patterncMinor
Defensive programming in C
Viewed 0 times
programmingdefensivestackoverflow
Problem
I wrote a function (
```
#include
#include
#include
// Check whether the two values are equal, and print an error if they are not.
void AssertEq(int lhs, int rhs, int line) {
if (lhs != rhs)
printf("Fail: %d != %d (line %d)", lhs, rhs, line);
}
#define ASSERT_EQ(lhs, rhs) do { AssertEq(lhs, rhs, __LINE__); } while (0);
// Given a valid C string pointer, find the index of the last character that
// is not whitespace. If str points to an empty string, return -1.
int find_index_of_last_nonwhitespace(char const* str) {
assert(str && "str must point to a valid C string");
int const length = strlen(str);
// We subtract 1 to skip the null terminator. Seeing as we check p >= str
// before we do anything else, this should be okay even for a str that is empty.
char const* p = str + length - 1;
while (p >= str && *p == ' ')
--p;
return p - str;
}
// Return the index of the beginning last word in the given C string. If the string
// is empty, return 0.
int find_index_of_beginning_of_last_word(char const* str) {
assert(str && "str must point to a valid C string");
int end_of_last_word = find_index_of_last_nonwhitespace(str);
// Subtract 1 so that we have the index of the first letter
char const* p = str + end_of_last_word;
while (p >= str && *p != ' ')
--p;
return p - str + 1; // To compensate for this being the index prior to the word.
}
// Given a destination buffer and a source C string pointer, copy the source
// into the destination until a space or the end of the string is hit.
// The buffer must be large enough to store the word and a \0 character after it.
// If dest == src, simply truncate after the first word.
void wordcpy(char dest, char const src) {
assert(src && "src must point to a valid C st
get_last_word_of) that takes a C string and a destination buffer, and then copies the last word into the buffer. The code was originally C++, and was changed into C later.```
#include
#include
#include
// Check whether the two values are equal, and print an error if they are not.
void AssertEq(int lhs, int rhs, int line) {
if (lhs != rhs)
printf("Fail: %d != %d (line %d)", lhs, rhs, line);
}
#define ASSERT_EQ(lhs, rhs) do { AssertEq(lhs, rhs, __LINE__); } while (0);
// Given a valid C string pointer, find the index of the last character that
// is not whitespace. If str points to an empty string, return -1.
int find_index_of_last_nonwhitespace(char const* str) {
assert(str && "str must point to a valid C string");
int const length = strlen(str);
// We subtract 1 to skip the null terminator. Seeing as we check p >= str
// before we do anything else, this should be okay even for a str that is empty.
char const* p = str + length - 1;
while (p >= str && *p == ' ')
--p;
return p - str;
}
// Return the index of the beginning last word in the given C string. If the string
// is empty, return 0.
int find_index_of_beginning_of_last_word(char const* str) {
assert(str && "str must point to a valid C string");
int end_of_last_word = find_index_of_last_nonwhitespace(str);
// Subtract 1 so that we have the index of the first letter
char const* p = str + end_of_last_word;
while (p >= str && *p != ' ')
--p;
return p - str + 1; // To compensate for this being the index prior to the word.
}
// Given a destination buffer and a source C string pointer, copy the source
// into the destination until a space or the end of the string is hit.
// The buffer must be large enough to store the word and a \0 character after it.
// If dest == src, simply truncate after the first word.
void wordcpy(char dest, char const src) {
assert(src && "src must point to a valid C st
Solution
Found some inputs that could cause wordcpy to perform undefined behaviour. See below.
Enough comments? Pretty close, though some needed tweaking.
What IS lacking is some definition of what is meant by "word", "space", "whitespace" especially as they relate to the presence of punctuation, tabs, newlines, etc.
As for ASSERT_EQ, I'm pretty sure you need separate per-type macros, functions, and format strings in C.
size_t would probably be cleaner for all lengths and offsets, but I don't know of any specific environments where int would be an actual issue.
Are the tests sufficient? Cases missed? I added a few and suggested the shape of a few more.
Are some unnecessary? You never know when you'll break an edge case.
I didn't compile anything, so consider all mods to be c-like pseudo code.
OP had printf(...
OP had ...If str points to an empty string, return -1.
OP had while (p >= str && *p == ' ')
OP had ...index of the beginning last word ... is empty, return 0.
Design Note: It seems a little strange that you get the same 0 result for inputs "abc" and " "
OP had // Subtract 1 so that we have the index of the first letter
(Comment removed -- no subtraction in sight)
Suggestion: add a reassuring comment around here about what happens for an empty/blank input.
OP had while (p >= str && *p != ' ')
Needs buffer overlap validation -- passed a dest pointer between src+1 and the last character in the first word of src, this will loop forever trashing memory.
Do we really mean "space" or general "white space" including \n \t, etc.? Up until now, I had been assuming "white space" as defined by isspace, so I'm following through, here.
OP had for ( ; s != '\0' && s != ' '; ++s, ++d)
Consistency in argument ordering (and argument naming? dest/output src/input)
between wrdcpy and this function might reduce caller confusion and might improve readability.
```
// Given a pointer to a C string, and a pointer to an output buffer that is at least
// as large as the last word in the input plus one, copy the last word of the input
// into the output buffer.
void get_last_word_of(char const input, char output) {
assert(input && "input must be a valid C string");
assert(output && "output must be a valid buffer");
int index_of_last_word = find_index_of_beginning_of_last_word(input);
wordcpy(output, input + index_of_last_word);
}
int main() {
ASSERT_EQ(find_index_of_last_nonwhitespace("Test "), 3);
ASSERT_EQ(find_index_of_last_nonwhitespace("Test"), 3);
ASSERT_EQ(find_index_of_last_nonwhitespace("Te st "), 4);
ASSERT_EQ(find_index_of_last_nonwhitespace("Te st"), 4);
ASSERT_EQ(find_index_of_last_nonwhitespace(""), -1);
ASSERT_EQ(find_index_of_last_nonwhitespace(" "), -1);
ASSERT_EQ(find_index_of_beginning_of_last_word("Test"), 0);
ASSERT_EQ(find_index_of_beginning_of_last_word("Test "), 0);
ASSERT_EQ(find_index_of_beginning_of_last_word("
Enough comments? Pretty close, though some needed tweaking.
What IS lacking is some definition of what is meant by "word", "space", "whitespace" especially as they relate to the presence of punctuation, tabs, newlines, etc.
As for ASSERT_EQ, I'm pretty sure you need separate per-type macros, functions, and format strings in C.
size_t would probably be cleaner for all lengths and offsets, but I don't know of any specific environments where int would be an actual issue.
Are the tests sufficient? Cases missed? I added a few and suggested the shape of a few more.
Are some unnecessary? You never know when you'll break an edge case.
I didn't compile anything, so consider all mods to be c-like pseudo code.
#include
#include
#include
// Check whether the two values are equal, and print an error if they are not.
void AssertEq(int lhs, int rhs, int line) {
if (lhs != rhs)OP had printf(...
fprintf(stderr, "Fail: %d != %d (line %d)", lhs, rhs, line);
}
#define ASSERT_EQ(lhs, rhs) do { AssertEq(lhs, rhs, __LINE__); } while (0);
// Given a valid C string pointer, find the index of the last character thatOP had ...If str points to an empty string, return -1.
// is not whitespace. If str points to an empty or all-whitespace string, return -1.
int find_index_of_last_nonwhitespace(char const* str) {
assert(str && "str must point to a valid C string");
int const length = strlen(str);
// We subtract 1 to skip the null terminator. Seeing as we check p >= str
// before we do anything else, this should be okay even for a str that is empty.
char const* p = str + length - 1;OP had while (p >= str && *p == ' ')
while (p >= str && isspace(*p))
--p;
return p - str;
}OP had ...index of the beginning last word ... is empty, return 0.
// Return the index of the beginning of the last word in the given C string. If the string
// is empty or all whitespace, return 0.Design Note: It seems a little strange that you get the same 0 result for inputs "abc" and " "
int find_index_of_beginning_of_last_word(char const* str) {
assert(str && "str must point to a valid C string");
int end_of_last_word = find_index_of_last_nonwhitespace(str);OP had // Subtract 1 so that we have the index of the first letter
(Comment removed -- no subtraction in sight)
Suggestion: add a reassuring comment around here about what happens for an empty/blank input.
char const* p = str + end_of_last_word;OP had while (p >= str && *p != ' ')
while (p >= str && ! isspace(*p))
--p;
return p - str + 1; // To compensate for this being the index prior to the word.
}Needs buffer overlap validation -- passed a dest pointer between src+1 and the last character in the first word of src, this will loop forever trashing memory.
// Given a destination buffer and a source C string pointer, copy the source
// into the destination until a space or the end of the string is hit.
// The buffer must be large enough to store the word and a \0 character after it.
// If dest == src, simply truncate after the first word.
void wordcpy(char* dest, char const* src) {
assert(src && "src must point to a valid C string");
assert(dest && "dest must point to a valid buffer");
char* d = dest;
char const* s = src;Do we really mean "space" or general "white space" including \n \t, etc.? Up until now, I had been assuming "white space" as defined by isspace, so I'm following through, here.
OP had for ( ; s != '\0' && s != ' '; ++s, ++d)
for ( ; *s != '\0' && ! isspace(*s); ++s, ++d)
*d = *s;
*d = '\0';
}Consistency in argument ordering (and argument naming? dest/output src/input)
between wrdcpy and this function might reduce caller confusion and might improve readability.
```
// Given a pointer to a C string, and a pointer to an output buffer that is at least
// as large as the last word in the input plus one, copy the last word of the input
// into the output buffer.
void get_last_word_of(char const input, char output) {
assert(input && "input must be a valid C string");
assert(output && "output must be a valid buffer");
int index_of_last_word = find_index_of_beginning_of_last_word(input);
wordcpy(output, input + index_of_last_word);
}
int main() {
ASSERT_EQ(find_index_of_last_nonwhitespace("Test "), 3);
ASSERT_EQ(find_index_of_last_nonwhitespace("Test"), 3);
ASSERT_EQ(find_index_of_last_nonwhitespace("Te st "), 4);
ASSERT_EQ(find_index_of_last_nonwhitespace("Te st"), 4);
ASSERT_EQ(find_index_of_last_nonwhitespace(""), -1);
ASSERT_EQ(find_index_of_last_nonwhitespace(" "), -1);
ASSERT_EQ(find_index_of_beginning_of_last_word("Test"), 0);
ASSERT_EQ(find_index_of_beginning_of_last_word("Test "), 0);
ASSERT_EQ(find_index_of_beginning_of_last_word("
Code Snippets
#include <assert.h>
#include <stdio.h>
#include <string.h>
// Check whether the two values are equal, and print an error if they are not.
void AssertEq(int lhs, int rhs, int line) {
if (lhs != rhs)fprintf(stderr, "Fail: %d != %d (line %d)", lhs, rhs, line);
}
#define ASSERT_EQ(lhs, rhs) do { AssertEq(lhs, rhs, __LINE__); } while (0);
// Given a valid C string pointer, find the index of the last character that// is not whitespace. If str points to an empty or all-whitespace string, return -1.
int find_index_of_last_nonwhitespace(char const* str) {
assert(str && "str must point to a valid C string");
int const length = strlen(str);
// We subtract 1 to skip the null terminator. Seeing as we check p >= str
// before we do anything else, this should be okay even for a str that is empty.
char const* p = str + length - 1;while (p >= str && isspace(*p))
--p;
return p - str;
}// Return the index of the beginning of the last word in the given C string. If the string
// is empty or all whitespace, return 0.Context
StackExchange Code Review Q#7731, answer score: 4
Revisions (0)
No revisions yet.