HiveBrain v1.2.0
Get Started
← Back to all entries
patterncMinor

`atof` revisited

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
revisitedatofstackoverflow

Problem

In an answer to this question I mentioned best effort. Here I try to explain what I meant. Please keep in mind that the implementation is intentionally incomplete (missing features such as NAN are trivial to add) and focuses specifically on the numerical stability.

It was surprisingly hard to get it right, but it paid off: the results match stdlib implementation even for the most frivolous inputs I could come up with.

However, normalize is the messiest piece of code I have ever written. It computes three results - that alone is enough to raise some brows. And memmove looks particularly out of place.

All suggestions are welcome.

```
#include
#include
#include
#include

static char drop_leading_zeroes(char start)
{
return start + strspn(start, "0");
}

static char digit_span(char start)
{
return start + strspn(start, "0123456789");
}

static int normalize(char start, char end)
{
int shift = 0;
start = drop_leading_zeroes(start);
if (isdigit(**start)) {
end = digit_span(start);
shift = end - start;
if (**end == '.') {
end = digit_span(end + 1);
memmove(start + 1, start, shift);
*start += 1;
}
} else if (**start == '.') {
*start += 1;
end = drop_leading_zeroes(start);
shift = start - end;
start = end;
end = digit_span(end);
} else {
end = start;
}
return shift;
}

static double compute_mantissa(char start, char end)
{
double result = 0.0;
while (end != start) {
result = (result + (*--end - '0')) / 10;
}
return result;
}

double my_atof(char * s)
{
bool minus = false;
switch(*s) {
case '-': minus = true;
case '+': ++s; break;
}

char * end;
int exponent = normalize(&s, &end);

double mantissa = compute_mantissa(s, end);
if (minus) {
mantissa = -mantissa;
}

if (tolower(*end) == 'e') {

Solution

Modifying the input string

The real atof() does not modify the input string and it seems wrong for yours to do so. It could lead to all sorts of unexpected behavior. For example, your program could crash if you pass in a string literal that is in a read-only section:

// Segmentation violation!
double val = my_atof("55.5");


Or you might do this and get a surprising result:

double val1 = my_atof(str);
double val2 = my_atof(str);

// val1 != val2 because the string mutated


You can make a few small changes to make your atof() work without modifying the input. First remove the lines that modify the string:

memmove(*start + 1, *start, shift);
        *start += 1;


The only purpose of those lines is to erase the '.' separating the whole part from the fractional part. Then modify compute_mantissa() to skip any '.' characters:

static double compute_mantissa(const char * start, const char * end)
{
    double result = 0.0;
    while (end != start) {
        char c = *--end;
        if (c == '.')
            continue;
        result = (result + (c - '0')) / 10;
    }
    return result;
}


The last step is to change every char to a const char , now that you aren't going to modify any strings. You can see I already did that to compute_mantissa() above.
Reduce levels of indirection

In normalize(), you operate on pointers to pointers throughout the function. I find that if you use temporary variables, you can reduce the levels of indirection and the code will be easier to read. So for example:

static int normalize(const char ** pStart, const char ** pEnd)
{
    int shift = 0;
    const char * start = *pStart;
    const char * end;

    start = drop_leading_zeroes(start);
    if (isdigit(*start)) {
        end = digit_span(start);
        shift = end - start;
        if (*end == '.') {
            end = digit_span(end + 1);
        }
    } else if (*start == '.') {
        start += 1;
        end = drop_leading_zeroes(start);
        shift = start - end;
        start = end;
        end = digit_span(end);
    } else {
        end = start;
    }
    *pStart = start;
    *pEnd   = end;
    return shift;
}

Code Snippets

// Segmentation violation!
double val = my_atof("55.5");
double val1 = my_atof(str);
double val2 = my_atof(str);

// val1 != val2 because the string mutated
memmove(*start + 1, *start, shift);
        *start += 1;
static double compute_mantissa(const char * start, const char * end)
{
    double result = 0.0;
    while (end != start) {
        char c = *--end;
        if (c == '.')
            continue;
        result = (result + (c - '0')) / 10;
    }
    return result;
}
static int normalize(const char ** pStart, const char ** pEnd)
{
    int shift = 0;
    const char * start = *pStart;
    const char * end;

    start = drop_leading_zeroes(start);
    if (isdigit(*start)) {
        end = digit_span(start);
        shift = end - start;
        if (*end == '.') {
            end = digit_span(end + 1);
        }
    } else if (*start == '.') {
        start += 1;
        end = drop_leading_zeroes(start);
        shift = start - end;
        start = end;
        end = digit_span(end);
    } else {
        end = start;
    }
    *pStart = start;
    *pEnd   = end;
    return shift;
}

Context

StackExchange Code Review Q#120453, answer score: 2

Revisions (0)

No revisions yet.