patterncMinor
`atof` revisited
Viewed 0 times
revisitedatofstackoverflow
Problem
In an answer to this question I mentioned best effort. Here I try to explain what I meant. Please keep in mind that the implementation is intentionally incomplete (missing features such as
It was surprisingly hard to get it right, but it paid off: the results match
However,
All suggestions are welcome.
```
#include
#include
#include
#include
static char drop_leading_zeroes(char start)
{
return start + strspn(start, "0");
}
static char digit_span(char start)
{
return start + strspn(start, "0123456789");
}
static int normalize(char start, char end)
{
int shift = 0;
start = drop_leading_zeroes(start);
if (isdigit(**start)) {
end = digit_span(start);
shift = end - start;
if (**end == '.') {
end = digit_span(end + 1);
memmove(start + 1, start, shift);
*start += 1;
}
} else if (**start == '.') {
*start += 1;
end = drop_leading_zeroes(start);
shift = start - end;
start = end;
end = digit_span(end);
} else {
end = start;
}
return shift;
}
static double compute_mantissa(char start, char end)
{
double result = 0.0;
while (end != start) {
result = (result + (*--end - '0')) / 10;
}
return result;
}
double my_atof(char * s)
{
bool minus = false;
switch(*s) {
case '-': minus = true;
case '+': ++s; break;
}
char * end;
int exponent = normalize(&s, &end);
double mantissa = compute_mantissa(s, end);
if (minus) {
mantissa = -mantissa;
}
if (tolower(*end) == 'e') {
NAN are trivial to add) and focuses specifically on the numerical stability.It was surprisingly hard to get it right, but it paid off: the results match
stdlib implementation even for the most frivolous inputs I could come up with.However,
normalize is the messiest piece of code I have ever written. It computes three results - that alone is enough to raise some brows. And memmove looks particularly out of place.All suggestions are welcome.
```
#include
#include
#include
#include
static char drop_leading_zeroes(char start)
{
return start + strspn(start, "0");
}
static char digit_span(char start)
{
return start + strspn(start, "0123456789");
}
static int normalize(char start, char end)
{
int shift = 0;
start = drop_leading_zeroes(start);
if (isdigit(**start)) {
end = digit_span(start);
shift = end - start;
if (**end == '.') {
end = digit_span(end + 1);
memmove(start + 1, start, shift);
*start += 1;
}
} else if (**start == '.') {
*start += 1;
end = drop_leading_zeroes(start);
shift = start - end;
start = end;
end = digit_span(end);
} else {
end = start;
}
return shift;
}
static double compute_mantissa(char start, char end)
{
double result = 0.0;
while (end != start) {
result = (result + (*--end - '0')) / 10;
}
return result;
}
double my_atof(char * s)
{
bool minus = false;
switch(*s) {
case '-': minus = true;
case '+': ++s; break;
}
char * end;
int exponent = normalize(&s, &end);
double mantissa = compute_mantissa(s, end);
if (minus) {
mantissa = -mantissa;
}
if (tolower(*end) == 'e') {
Solution
Modifying the input string
The real
Or you might do this and get a surprising result:
You can make a few small changes to make your
The only purpose of those lines is to erase the
The last step is to change every
Reduce levels of indirection
In
The real
atof() does not modify the input string and it seems wrong for yours to do so. It could lead to all sorts of unexpected behavior. For example, your program could crash if you pass in a string literal that is in a read-only section:// Segmentation violation!
double val = my_atof("55.5");Or you might do this and get a surprising result:
double val1 = my_atof(str);
double val2 = my_atof(str);
// val1 != val2 because the string mutatedYou can make a few small changes to make your
atof() work without modifying the input. First remove the lines that modify the string:memmove(*start + 1, *start, shift);
*start += 1;The only purpose of those lines is to erase the
'.' separating the whole part from the fractional part. Then modify compute_mantissa() to skip any '.' characters:static double compute_mantissa(const char * start, const char * end)
{
double result = 0.0;
while (end != start) {
char c = *--end;
if (c == '.')
continue;
result = (result + (c - '0')) / 10;
}
return result;
}The last step is to change every
char to a const char , now that you aren't going to modify any strings. You can see I already did that to compute_mantissa() above.Reduce levels of indirection
In
normalize(), you operate on pointers to pointers throughout the function. I find that if you use temporary variables, you can reduce the levels of indirection and the code will be easier to read. So for example:static int normalize(const char ** pStart, const char ** pEnd)
{
int shift = 0;
const char * start = *pStart;
const char * end;
start = drop_leading_zeroes(start);
if (isdigit(*start)) {
end = digit_span(start);
shift = end - start;
if (*end == '.') {
end = digit_span(end + 1);
}
} else if (*start == '.') {
start += 1;
end = drop_leading_zeroes(start);
shift = start - end;
start = end;
end = digit_span(end);
} else {
end = start;
}
*pStart = start;
*pEnd = end;
return shift;
}Code Snippets
// Segmentation violation!
double val = my_atof("55.5");double val1 = my_atof(str);
double val2 = my_atof(str);
// val1 != val2 because the string mutatedmemmove(*start + 1, *start, shift);
*start += 1;static double compute_mantissa(const char * start, const char * end)
{
double result = 0.0;
while (end != start) {
char c = *--end;
if (c == '.')
continue;
result = (result + (c - '0')) / 10;
}
return result;
}static int normalize(const char ** pStart, const char ** pEnd)
{
int shift = 0;
const char * start = *pStart;
const char * end;
start = drop_leading_zeroes(start);
if (isdigit(*start)) {
end = digit_span(start);
shift = end - start;
if (*end == '.') {
end = digit_span(end + 1);
}
} else if (*start == '.') {
start += 1;
end = drop_leading_zeroes(start);
shift = start - end;
start = end;
end = digit_span(end);
} else {
end = start;
}
*pStart = start;
*pEnd = end;
return shift;
}Context
StackExchange Code Review Q#120453, answer score: 2
Revisions (0)
No revisions yet.