patterncMinor
Finding strings inside of other strings in order in C
Viewed 0 times
orderfindingotherstringsinside
Problem
A question on Stack Overflow recently intrigued me to the point of me implementing the functionality in C. Can you critique this algorithm and tell me what is good and bad about it?
This program will tell you if a string is inside of another string.
For example:
```
#include
#include
#include
#include
/*
const char const read only
const char is read/write
*/
int find_str_in_str(const char const base, const char const sub)
{
int base_len = strlen(base);
int sub_len = strlen(sub);
char *tmp_sub = NULL;
/ allocate enough mem for the max string length /
if(base_len > sub_len) {
tmp_sub = malloc(base_len + 1);
}
else {
tmp_sub = malloc(sub_len + 1);
}
if(NULL == tmp_sub) {
fprintf(stderr, "Runtime error (malloc)\n");
exit(1);
}
int i = 0;
int j = 0;
for(; i < sub_len; i++) {
for(; j < base_len; j++) {
if(base[j] == sub[i]) {
tmp_sub[i] = base[j];
/ the first occurance was found /
break;
}
}
}
tmp_sub[i++] = '\0';
if(0 == strcmp(sub, tmp_sub)) {
free(tmp_sub);
return 1;
} else {
free(tmp_sub);
return 0;
}
}
int main(int argc, char **argv)
{
if(argc < 3) {
fprintf(stderr, "Usage: %s %s %s\n", argv[0], "base", "derived");
return EXIT_FAILURE;
}
fprintf(stdout, "Base : %s\n", argv[0]);
fprintf(stdout, "Sub : %s\n", argv[1]);
if(1 == find_str_in_str(argv[1], argv[2])) {
This program will tell you if a string is inside of another string.
For example:
- "elf" would be inside "self"
- "hit" would be inside "chemistry"
- "tim" would not be inside "chemistry"
- "try" would be inside "chemistry"
```
#include
#include
#include
#include
/*
const char const read only
const char is read/write
*/
int find_str_in_str(const char const base, const char const sub)
{
int base_len = strlen(base);
int sub_len = strlen(sub);
char *tmp_sub = NULL;
/ allocate enough mem for the max string length /
if(base_len > sub_len) {
tmp_sub = malloc(base_len + 1);
}
else {
tmp_sub = malloc(sub_len + 1);
}
if(NULL == tmp_sub) {
fprintf(stderr, "Runtime error (malloc)\n");
exit(1);
}
int i = 0;
int j = 0;
for(; i < sub_len; i++) {
for(; j < base_len; j++) {
if(base[j] == sub[i]) {
tmp_sub[i] = base[j];
/ the first occurance was found /
break;
}
}
}
tmp_sub[i++] = '\0';
if(0 == strcmp(sub, tmp_sub)) {
free(tmp_sub);
return 1;
} else {
free(tmp_sub);
return 0;
}
}
int main(int argc, char **argv)
{
if(argc < 3) {
fprintf(stderr, "Usage: %s %s %s\n", argv[0], "base", "derived");
return EXIT_FAILURE;
}
fprintf(stdout, "Base : %s\n", argv[0]);
fprintf(stdout, "Sub : %s\n", argv[1]);
if(1 == find_str_in_str(argv[1], argv[2])) {
Solution
Allocating
Using this fact, the entire algorithm can be simplified to remove the
In your nested for loops, remove the
Insufficient karma to comment @William Morris
I had returned to write nearly what you have provided as a higher performance / more obscure solution as well. If you exchange the body of your for loop for the obscure trick I had mentioned above (but have now move below) you can avoid one more logic branch in your code.
Feel free to edit my answer and remove this lower block if you choose to update your answer.
tmp_sub only needs to be the size of sub: if sub is found in base then it will be exactly the same length.Using this fact, the entire algorithm can be simplified to remove the
tmp_sub and calls to strcomp entirely.In your nested for loops, remove the
i loop and continue incrementing j each iteration. Increment i iff sub[i] == base[j]. Then, check i. If i == sub_len return true. If the loop exits (where j is equal to base_len) then return false.int find_str_in_str(const char* const base, const char* const sub)
{
int base_len = strlen(base);
int sub_len = strlen(sub);
int i = 0;
int j = 0;
for ( ; j < base_len; j++) {
if (sub[i] == base[j]) i++;
if (i == sub_len) return 1; // we found all sub chars in order
}
return 0; // we reached the end of base without finding all chars
}Insufficient karma to comment @William Morris
I had returned to write nearly what you have provided as a higher performance / more obscure solution as well. If you exchange the body of your for loop for the obscure trick I had mentioned above (but have now move below) you can avoid one more logic branch in your code.
static int find_str_in_str(const char *base, const char *sub)
{
// Check arguments for empty string
for (; *base != '\0' && *sub != '\0'; ++base) {
sub += (int)(*base == *sub);
}
return *sub == '\0';
}Feel free to edit my answer and remove this lower block if you choose to update your answer.
Code Snippets
int find_str_in_str(const char* const base, const char* const sub)
{
int base_len = strlen(base);
int sub_len = strlen(sub);
int i = 0;
int j = 0;
for ( ; j < base_len; j++) {
if (sub[i] == base[j]) i++;
if (i == sub_len) return 1; // we found all sub chars in order
}
return 0; // we reached the end of base without finding all chars
}static int find_str_in_str(const char *base, const char *sub)
{
// Check arguments for empty string
for (; *base != '\0' && *sub != '\0'; ++base) {
sub += (int)(*base == *sub);
}
return *sub == '\0';
}Context
StackExchange Code Review Q#67741, answer score: 3
Revisions (0)
No revisions yet.