HiveBrain v1.2.0
Get Started
← Back to all entries
snippetcMinor

C program to convert string to floating point

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
pointconvertfloatingprogramstring

Problem

From K&R:


Extend atof to handle scientific notation of the form
123.45e-6
where a floating-point number may be followed by e or E and an optionally
signed exponent.

I only added the part that handles the scientific notation. It compiles and works fine. I think it's way too long and could be shortened. I also don't like the names.

#include 
#include 
#include 
#define MAX_SIZE 20

double atof(const char *s){
    int i;
    for(i = 0; isspace(s[i]); ++i);
        /*skip white space*/

    int sign;
    sign = (s[i] == '-')? -1 : 1; /*The sign of the number*/

    if(s[i] == '-' || s[i] == '+'){
        ++i;
    }

    double value;
    for(value = 0.0; isdigit(s[i]); ++i){
        value = value * 10.0 + (s[i] - '0');
    }

    if(s[i] == '.'){
        ++i;
    }

    double power;
    for(power = 1.0; isdigit(s[i]); ++i){
        value = value * 10.0 + (s[i] - '0');
        power *= 10.0;
    }

    if(s[i] == 'e' || s[i] == 'E'){
        ++i;
    }
    else{
        return sign * value/power;
    }

    int powersign; /*The sign following the E*/
    powersign = (s[i] == '-')? -1 : 1;

    if(s[i] == '-' || s[i] == '+'){
        ++i;
    }

    int power2; /*The number following the E*/
    for(power2 = 0; isdigit(s[i]); ++i){
        power2 = power2 * 10 + (s[i] - '0');
    }

    if(powersign == -1){
        while(power2 != 0){
            power *= 10;
            --power2;
        }
    }
    else{
        while(power2 != 0){
            power /= 10;
            --power2;
        }
    }

    return sign * value/power;
}

int main(void){
    char string[MAX_SIZE];

    fgets(string, sizeof(string), stdin);

    printf("%.9f", atof(string));

    return 0;
}

Solution

Thanks for including a test program - that's always valuable!

However, I'm going to change it, to parse a battery of test cases instead of reading from stdin:

#include 
int main(void)
{
    static const char *const strings[] = {
        /* these should parse fully */
        "12",
        "12.0",
        "08",                   /* not octal! */
        "+12.34",
        ".34",
        "\t \n2.",
        "1e0",
        "1e+0",
        "1e-0",
        "1.e4",
        ".1e-4",
        "-5e006",
        "-5e+16",
        "-.05",
        "-.0",
        "-1e6",
        /* these should parse only the initial part */
        "5c5",
        "10ee5",
        "0x06",                 /* not hex! */
        "--1" ,
        "-+1" ,
        "1e--4" ,
        "-1e.4",
        "1e 4",
        "1e-g",
        "", "foobar",           /* both 0 */
        " e5",                  /* also 0 */
        "-1e6",
        /* overflow/underflow */
        "1e500000",
        "1e-500000",
        "-1e500000",
        "-1e-500000",
    };

    static const int max = sizeof strings / sizeof strings[0];
    for (int i = 0;  i  %.9g\n", strings[i], extended_atof(strings[i]));
}


(I changed the function name to extended_atof() so as to be safely distinct from the standard library atof().)

Your implementation passes all these tests. Now we can look at refactoring.

Remove duplication

The things that we parse in multiple places are:

  • optional sign + or -



  • digit sequences



So perhaps we can refactor each of those into a function? Instead of using an integer index into the supplied string, I prefer to just move the string pointer, and eliminate the need for i:

/* return true for positive, false for negative,
   and advance `*s` to next position */
static bool parse_sign(const char **s)
{
    switch (**s) {
    case '-': ++*s; return false;
    case '+': ++*s; return true;
    default: return true;
    }
}


Let's make use of that in the function:

double extended_atof(const char *s)
{
    /*skip white space*/
    while (isspace(*s))
        ++s;

    int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/

    double value = 0.0;
    while (isdigit(*s))
        value = value * 10.0 + (*s++ - '0');

    if (*s == '.') {
        ++s;
    }

    double power = 1.0;
    while (isdigit(*s)) {
        value = value * 10.0 + (*s++ - '0');
        power *= 10.0;
    }

    if (tolower(*s) == 'e') {
        ++s;
    } else {
        return sign * value/power;
    }

    bool powersign = parse_sign(&s); /*The sign following the E*/

    int power2 = 0.0; /*The number following the E*/
    while (isdigit(*s))
        power2 = power2 * 10.0 + (*s++ - '0');

    if (powersign) {
        while (power2 != 0) {
            power /= 10;
            --power2;
        }
    } else {
        while (power2 != 0) {
            power *= 10;
            --power2;
        }
    }

    return sign * value/power;
}


It's slightly shorter, and it still passes all the tests.

Let's see if we can read digit strings in a function, and replace the three places we do that. We'll make it update a count of how many digits wore parsed, so we don't lose leading zeros in the fractional part:

double extended_atof(const char *s)
{
    /*skip white space*/
    while (isspace(*s))
        ++s;

    int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/

    double value = parse_digits(&s, NULL);

    if (*s == '.') {
        ++s;
        int d;                  /* digits in fraction */
        double fraction = parse_digits(&s, &d);
        while (d--)
            fraction /= 10.0;
        value += fraction;
    }

    value *= sign;

    if (tolower(*s) == 'e') {
        ++s;
    } else {
        return value;
    }

    bool powersign = parse_sign(&s); /*The sign following the E*/

    int power2 = parse_digits(&s, NULL); /*The number following the E*/

    double power = 1.0;
    if (powersign) {
        while (power2 != 0) {
            power /= 10;
            --power2;
        }
    } else {
        while (power2 != 0) {
            power *= 10;
            --power2;
        }
    }

    return value/power;
}


Tests still pass; what's next?

if (tolower(*s) == 'e') {
    ++s;
} else {
    return value;
}


This can be reversed, and if we're returning, it doesn't matter what we do to s:

if (tolower(*s++) != 'e')
    return value;


Here's some near-duplicate blocks:

double power = 1.0;
if (powersign) {
    while (power2 != 0) {
        power /= 10;
        --power2;
    }
} else {
    while (power2 != 0) {
        power *= 10;
        --power2;
    }
}


Dividing by 10 is the same as multiplying by 0.1, so we can move the test into the loop:

double power = 1.0;
while (power2 != 0) {
    power *= powersign ? 0.1 : 10;
    --power2;
}


We could go further, and capture powersign ? 0.1 : 10 into a variable. We can also eliminate the power variable from here, and multiply value

Code Snippets

#include <stdio.h>
int main(void)
{
    static const char *const strings[] = {
        /* these should parse fully */
        "12",
        "12.0",
        "08",                   /* not octal! */
        "+12.34",
        ".34",
        "\t \n2.",
        "1e0",
        "1e+0",
        "1e-0",
        "1.e4",
        ".1e-4",
        "-5e006",
        "-5e+16",
        "-.05",
        "-.0",
        "-1e6",
        /* these should parse only the initial part */
        "5c5",
        "10ee5",
        "0x06",                 /* not hex! */
        "--1" ,
        "-+1" ,
        "1e--4" ,
        "-1e.4",
        "1e 4",
        "1e-g",
        "", "foobar",           /* both 0 */
        " e5",                  /* also 0 */
        "-1e6",
        /* overflow/underflow */
        "1e500000",
        "1e-500000",
        "-1e500000",
        "-1e-500000",
    };

    static const int max = sizeof strings / sizeof strings[0];
    for (int i = 0;  i < max;  ++i)
        printf("%20s = > %.9g\n", strings[i], extended_atof(strings[i]));
}
/* return true for positive, false for negative,
   and advance `*s` to next position */
static bool parse_sign(const char **s)
{
    switch (**s) {
    case '-': ++*s; return false;
    case '+': ++*s; return true;
    default: return true;
    }
}
double extended_atof(const char *s)
{
    /*skip white space*/
    while (isspace(*s))
        ++s;

    int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/

    double value = 0.0;
    while (isdigit(*s))
        value = value * 10.0 + (*s++ - '0');

    if (*s == '.') {
        ++s;
    }

    double power = 1.0;
    while (isdigit(*s)) {
        value = value * 10.0 + (*s++ - '0');
        power *= 10.0;
    }

    if (tolower(*s) == 'e') {
        ++s;
    } else {
        return sign * value/power;
    }

    bool powersign = parse_sign(&s); /*The sign following the E*/

    int power2 = 0.0; /*The number following the E*/
    while (isdigit(*s))
        power2 = power2 * 10.0 + (*s++ - '0');

    if (powersign) {
        while (power2 != 0) {
            power /= 10;
            --power2;
        }
    } else {
        while (power2 != 0) {
            power *= 10;
            --power2;
        }
    }

    return sign * value/power;
}
double extended_atof(const char *s)
{
    /*skip white space*/
    while (isspace(*s))
        ++s;

    int sign = parse_sign(&s) ? 1 : -1; /*The sign of the number*/

    double value = parse_digits(&s, NULL);

    if (*s == '.') {
        ++s;
        int d;                  /* digits in fraction */
        double fraction = parse_digits(&s, &d);
        while (d--)
            fraction /= 10.0;
        value += fraction;
    }

    value *= sign;

    if (tolower(*s) == 'e') {
        ++s;
    } else {
        return value;
    }

    bool powersign = parse_sign(&s); /*The sign following the E*/

    int power2 = parse_digits(&s, NULL); /*The number following the E*/

    double power = 1.0;
    if (powersign) {
        while (power2 != 0) {
            power /= 10;
            --power2;
        }
    } else {
        while (power2 != 0) {
            power *= 10;
            --power2;
        }
    }

    return value/power;
}
if (tolower(*s) == 'e') {
    ++s;
} else {
    return value;
}

Context

StackExchange Code Review Q#158519, answer score: 4

Revisions (0)

No revisions yet.