patterncMinor
Trimming whitespace from the beginning and ending of a file
Viewed 0 times
thefilewhitespacetrimmingbeginningendingandfrom
Problem
In the code below, I want to remove the "white space character" from 2 places only, from the end of 1st line and from the end of last line. How can I optimize it ?
Platform : Linux , gcc compiler
```
int remove_special_chars(char file_path, char dest_file, int flag)
{
PRINT_FUN_NAME
struct stat st;
unsigned char *fileData=NULL;
FILE fp = NULL, dest_file_fp = NULL, *intermidiate_fp = NULL;
char tmp_str[256] = {0}, str_buffer[100] = {0};
int file_size = 0, count = 0, read_len = 0;
/* I have taken 3 files here "file_path" is
the source file from which i am reading
data and intermidiate_path is the 2nd file
in which iam storing data after removing
the space from 1st line. Then iam reding
the intermidiate file and removing the space
from last line and finally storing it into
3rd file i.e is "dest_file". */
if ((fp = fopen(file_path, "r")) == NULL) {
DPRINTF("File Opening Failed!\n");
return -1;
}
if((intermidiate_fp = fopen(INTERMEDIATE_FILE_PATH, "wb+")) == NULL) {
DPRINTF("File Opening Failed!\n");
return -1;
}
if((dest_file_fp = fopen(dest_file, "wb")) == NULL) {
DPRINTF("File Opening Failed!\n");
return -1;
}
while(fgets(tmp_str, sizeof(tmp_str), fp) != NULL) {
DPRINTF("tmp_str : %s\n", tmp_str);
fputs(tmp_str, intermidiate_fp);
if(count == 0) {
rewind(intermidiate_fp);
strncpy(str_buffer, tmp_strstrlen(tmp_str));
str_buffer[strlen(str_buffer) - 1] = '\0';
DPRINTF("str_buffer : %s\n", str_buffer);
fputs(str_buffer, intermidiate_fp);
count ++;
}
}
rewind(intermidiate_fp);
stat(INTERMEDIATE_FILE_PATH, &st);
if (st.st_size == 0) {
return (-2);
}
file_size = st.st_size;
DPRINTF("File size : %d\n", file_size);
fileData = (unsigned char *)mal
Platform : Linux , gcc compiler
```
int remove_special_chars(char file_path, char dest_file, int flag)
{
PRINT_FUN_NAME
struct stat st;
unsigned char *fileData=NULL;
FILE fp = NULL, dest_file_fp = NULL, *intermidiate_fp = NULL;
char tmp_str[256] = {0}, str_buffer[100] = {0};
int file_size = 0, count = 0, read_len = 0;
/* I have taken 3 files here "file_path" is
the source file from which i am reading
data and intermidiate_path is the 2nd file
in which iam storing data after removing
the space from 1st line. Then iam reding
the intermidiate file and removing the space
from last line and finally storing it into
3rd file i.e is "dest_file". */
if ((fp = fopen(file_path, "r")) == NULL) {
DPRINTF("File Opening Failed!\n");
return -1;
}
if((intermidiate_fp = fopen(INTERMEDIATE_FILE_PATH, "wb+")) == NULL) {
DPRINTF("File Opening Failed!\n");
return -1;
}
if((dest_file_fp = fopen(dest_file, "wb")) == NULL) {
DPRINTF("File Opening Failed!\n");
return -1;
}
while(fgets(tmp_str, sizeof(tmp_str), fp) != NULL) {
DPRINTF("tmp_str : %s\n", tmp_str);
fputs(tmp_str, intermidiate_fp);
if(count == 0) {
rewind(intermidiate_fp);
strncpy(str_buffer, tmp_strstrlen(tmp_str));
str_buffer[strlen(str_buffer) - 1] = '\0';
DPRINTF("str_buffer : %s\n", str_buffer);
fputs(str_buffer, intermidiate_fp);
count ++;
}
}
rewind(intermidiate_fp);
stat(INTERMEDIATE_FILE_PATH, &st);
if (st.st_size == 0) {
return (-2);
}
file_size = st.st_size;
DPRINTF("File size : %d\n", file_size);
fileData = (unsigned char *)mal
Solution
A couple lines that bother me:
First, bear in mind that
Second, you're copying a large (256-byte) buffer into a smaller (100-byte) buffer, and you're (presumably) passing the length of the larger buffer to
When the string has length zero (e.g. the file is empty or did not have a trailing newline), the array access will be out of range. Otherwise, it will blindly delete the last character, even if it wasn't whitespace.
The lines that I highlighted will work... if you make a couple assumptions:
-
Every line of input has
-
The "whitespace characters" we want to delete always exist.
However, it looks like you're deleting the trailing newline returned by
If I were you, the first step I'd take is to come up with a concise specification of what you're trying to do. Correct me if I'm wrong, but it sounds like you want to do this:
The
Then, observe that this operation can be done with a trivial amount of buffering. When a newline is detected, hang on to the last character in the line, and only emit it when you're not on the first line and you know that more lines follow. Thus, you don't need an intermediate file.
I recommend implementing a helper function with this signature:
You should only need to
strncpy(str_buffer, tmp_strstrlen(tmp_str));First, bear in mind that
strncpy does not place a \0 terminator at the end of the string if the buffer size limit is reached (but it does in every other case!). Thus, you have to be extremely careful when using strncpy.Second, you're copying a large (256-byte) buffer into a smaller (100-byte) buffer, and you're (presumably) passing the length of the larger buffer to
strncpy. If tmp_str holds a string that's too long, you'll get a buffer overflow.str_buffer[strlen(str_buffer) - 1] = '\0';When the string has length zero (e.g. the file is empty or did not have a trailing newline), the array access will be out of range. Otherwise, it will blindly delete the last character, even if it wasn't whitespace.
The lines that I highlighted will work... if you make a couple assumptions:
-
Every line of input has
-
The "whitespace characters" we want to delete always exist.
However, it looks like you're deleting the trailing newline returned by
fgets. I'm not even sure your code works.If I were you, the first step I'd take is to come up with a concise specification of what you're trying to do. Correct me if I'm wrong, but it sounds like you want to do this:
first line: s/[\t ]$//
last line: s/[\t ]$//
Lines are terminated by '\n', except the last line might not have a terminator.The
s/[\t ]$// syntax is borrowed from sed, and means: if there's a '\t' or ' ' character at the end of the string, remove it (i.e. replace it with an empty string).Then, observe that this operation can be done with a trivial amount of buffering. When a newline is detected, hang on to the last character in the line, and only emit it when you're not on the first line and you know that more lines follow. Thus, you don't need an intermediate file.
I recommend implementing a helper function with this signature:
int remove_trailing_spaces(FILE *in, FILE *out);You should only need to
fread and fwrite in large chunks. The int lets you return an error code if one of these functions fails.Code Snippets
strncpy(str_buffer, tmp_strstrlen(tmp_str));str_buffer[strlen(str_buffer) - 1] = '\0';first line: s/[\t ]$//
last line: s/[\t ]$//
Lines are terminated by '\n', except the last line might not have a terminator.int remove_trailing_spaces(FILE *in, FILE *out);Context
StackExchange Code Review Q#11276, answer score: 3
Revisions (0)
No revisions yet.