// CharactersInDouble // This code is not protected by copyright and is in the public domain. // Author: Meritt Reynolds #include "CharactersInDouble.h" // CharactersInDouble // // Parse a string and return the number of characters making up the representation // of a floating point number. No conversion is done here, the goal is to identify // the existence and length of the number. The conversion is best done by system // routines like atof, sscanf, etc. // // Returns 0 if there is no valid number present. // // Does not skip white space so first character must be part of the number. If you // don't care about white space then skip it before calling CharactersInDouble. // // Example: // // while (isspace(p)) p++; // n = CharactersInDouble(p); // if (n) // { // x = atof(p); // p += n; // move to unparsed part of string // } // // HOW IT WORKS // // A finite state machine with eight states is used to parse numbers that might // be in scientific notation. Exponent can be indicated by 'e' or 'E'. // Might want to generalize to 'd' and 'D' as well so old FORTRAN output // doesn't cause problems. // // Valid numbers: // 1 integer is a double // -1 negative numbers OK // +1 explicit + sign OK // 1. trailing . OK // .1 leading . OK // 1e2 exponent without . before // 1e+2 exponent with explicit + // 1e-2 exponent with - // 1.e2 exponent with . before // 1.1e1 full mantissa // 1.34e+4 general test // // There is no "looking forward". For example, this subroutine does // not forgive trailing 'e'. If it gets an 'e' it insists on a valid exponent. // // Invalid numbers: // " 1" leading white space // -a no digit after the - // a not a digit // 1e e assumed to be part of number, but exponent is missing // // Validly terminated numbers: // // 1.7a a clearly not part of the number (returns 3) // 1.e-7e 2nd e clearly not part of the number (returns 5) // // Thinking of porting this to C# one day, we do not use pointers. int CharactersInDouble(const char p[]) { int len = strlen(p); // in C# we will use something different int bytes = 0; int state = 0; while (1) { int c, digit; // Get next character from string. Treat terminating 0 as // a character and let the finite state machine process it. if (bytes < len) { c = (int)p[bytes]; } else { c = 0; } digit = isdigit(c); if (state == 0) // start here { if (c == '+' || c == '-') state = 1; else if (digit) state = 2; else if (c == '.') state = 3; else return 0; } else if (state == 1) // we had a sign, cannot have another { if (c == '.') state = 4; else if (digit) state = 2; else return 0; } else if (state == 2) // had a digit, working on integer part { if (c == 'e' || c == 'E') state = 5; else if (digit) state = 2; else if (c == '.') state = 4; else break; } else if (state == 3) // had a leading ., need fractional part { if (digit) state = 4; else return 0; } else if (state == 4) // working on fractional part { if (c == 'e' || c == 'E') state = 5; else if (digit) state = 4; else break; } else if (state == 5) // had an 'e', need sign or a digit { if (c == '+' || c == '-') state = 6; else if (digit) state = 7; else return 0; } else if (state == 6) // had a sign, need a digit { if (digit) state = 7; else return 0; } else if (state == 7) // working on the exponent { if (digit) state = 7; else break; } // Here the character has been accepted. bytes++; } return bytes; }