HiveBrain v1.2.0
Get Started
← Back to all entries
patterncMinor

C string library inspired by Python string functions

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
pythoninspiredlibraryfunctionsstring

Problem

I'm working on a C library which attempts to bring some of the Python string functions over to C.

This is where my full source is located, but the majority of what I have so far is here.

Are there any places where code efficiency can be improved?

#ifndef OCTO_H
#define OCTO_H

#include 
#include 

int len(char* string){

    int i = 0;
    while(*string != '\0'){
        i++;
        *string++;
    }

    return i;

}

char* lstrip(char* string){

    if(!string) 
        return;

    /* Trim off leading whitespace */
    while(*string == ' '){
        string++;
    }

    return string;

}

char* rstrip(char* string){

    /* Trim off trailing whitespace */
    char* end = string + len(string) - 1;
        while (end >= string && *end == ' ')
                end--;
        *(end + 1) = '\0';

    return string;

}

char* strip(char* string){

    /* Trim off both leading and trailing whitespace */
    char* lstring = lstrip(string);
    char* finalString = rstrip(lstring);

    return finalString;

}

char* slice(char* s, int start, int end){

    /* Create a new identical buffer */
    char* buff = (char*) malloc((end - start) + 2);
    strncpy(buff, s + start, (end - start) + 1);
    *(buff + (end - start) + 1) = '\0';
    return buff;
}

char* toUpperCase(char* s){

}

char* toLowerCase(char* s){

}

#endif /* OCTO_H */

Solution

Well efficiency can be improved by using the built-in string functions.

They have been optimized at the assembly level for most implementations.

Assuming you were writing len() in C rather than using some optimal assemble instructions. The canonical form looks more like this (though I am sure it can be done better). Though there is strlen() which does the same thing.

int len(char* string)
{
    char *end = string;
    for(;*end;++end)  /* Notice nothing here */;
    return end-string;
}


Looking at this:

char* lstrip(char* string){

    /* This makes it non optimal for strings we already know are good.
     * If I already know my string is fine I would want a function that
     * did not do this test.
     *
     * Of course you can then have a second function that does the test
     * then calls the non checked version (and if it is short the compiler
     * will inline
     */
    if(!string)
        /* Not returning a value from a function expecting a value is an error
         * Check the warning messages generated by the compiler
         */      
        return;

    /*
     * ' ' is not white space it is just a space character
     *    What you are looking for is isspace(c)
     */    
    while(*string == ' '){
        string++;
    }

    return string;
}


I would have written like this:

char* lstripNT(char* string){ return string?lstrip(string):NULL;}
char* lstrip(char* string){
    for(;isspace(string);++string) /*Nothing here*/;
    return string;

    // NOTE: there is an issue with this technique in that you
    //       are not returning the original string.
    //       If the string was dynamically allocated
    //       you now have issues with freeing it. Which means you
    //       specifically need to keep a copy of the original pointer
    //       for the purpose of freeing it when you are finished.
    //       This is NOT a good idea.
}


Because of the problem with dynamic allocation I would probably write like this:

char* lstrip(char* string)
{
    size_t  move = 0;
    char*   start= string;
    for(;isspace(start);++start,++move)
    {}
    if (move)
    {
        // Move all the characters down.
        // So we basically squish the white space.
        for(;*start;++start)
        {
            *(start-move) = *start;
        }
    }
    return string;
}


Right trim. Is not really optimal because you scan to the end (using len). Then you then scan backwards until you find a non space character. You can do this in a single traverse. Also you are not being consistent. lstrip() checks for NULL while this does not.

char* rstripNT(char* string){ return string?rstrip(string):NULL;}
char* rstrip(char* string){

      char* loop    = string;
      char* last    = NULL;
      int   isSpace = false;
      while(*loop)
      {
          isSpace = TRUE;
          last    = loop;
          for(;*loop && isspace(*loop);++loop)
          {}
          if (*loop)
          {
              isSpace = FALSE;
              for(;*loop && !isspace(*loop);++loop)
              {}
          }
      }
      if (isSpace)
      {
          *last = '\0';
      }
      return string;
}


The strip.

char* strip(char* string){

    /* Trim off both leading and trailing whitespace */
    char* lstring = lstrip(string);
    char* finalString = rstrip(lstring);

    return finalString;

}


This is conceptually what you want to do.

But I don't think that will be optimal way to do it. I would write this as a single pass.

Code Snippets

int len(char* string)
{
    char *end = string;
    for(;*end;++end)  /* Notice nothing here */;
    return end-string;
}
char* lstrip(char* string){


    /* This makes it non optimal for strings we already know are good.
     * If I already know my string is fine I would want a function that
     * did not do this test.
     *
     * Of course you can then have a second function that does the test
     * then calls the non checked version (and if it is short the compiler
     * will inline
     */
    if(!string)
        /* Not returning a value from a function expecting a value is an error
         * Check the warning messages generated by the compiler
         */      
        return;

    /*
     * ' ' is not white space it is just a space character
     *    What you are looking for is isspace(c)
     */    
    while(*string == ' '){
        string++;
    }

    return string;
}
char* lstripNT(char* string){ return string?lstrip(string):NULL;}
char* lstrip(char* string){
    for(;isspace(string);++string) /*Nothing here*/;
    return string;

    // NOTE: there is an issue with this technique in that you
    //       are not returning the original string.
    //       If the string was dynamically allocated
    //       you now have issues with freeing it. Which means you
    //       specifically need to keep a copy of the original pointer
    //       for the purpose of freeing it when you are finished.
    //       This is NOT a good idea.
}
char* lstrip(char* string)
{
    size_t  move = 0;
    char*   start= string;
    for(;isspace(start);++start,++move)
    {}
    if (move)
    {
        // Move all the characters down.
        // So we basically squish the white space.
        for(;*start;++start)
        {
            *(start-move) = *start;
        }
    }
    return string;
}
char* rstripNT(char* string){ return string?rstrip(string):NULL;}
char* rstrip(char* string){

      char* loop    = string;
      char* last    = NULL;
      int   isSpace = false;
      while(*loop)
      {
          isSpace = TRUE;
          last    = loop;
          for(;*loop && isspace(*loop);++loop)
          {}
          if (*loop)
          {
              isSpace = FALSE;
              for(;*loop && !isspace(*loop);++loop)
              {}
          }
      }
      if (isSpace)
      {
          *last = '\0';
      }
      return string;
}

Context

StackExchange Code Review Q#29815, answer score: 5

Revisions (0)

No revisions yet.