patterncMinor
Removing any duplicate characters in a string
Viewed 0 times
removingduplicateanycharactersstring
Problem
I have piece of code in C which removes any duplicate characters in a string. But I am doing it in two loops and would like it to optimize it to one loop.
void removeDuplicates(char* ptr)
{
int end = 1;
int length = strlen(ptr);
int current;
int i;
current = 1;
for(end=1; end<length; end++)
{
for(i=0; i<end; i++)
{
if(ptr[end] == ptr[i])
break;
}
if(i == end)
{
ptr[current] = ptr[i];
current++;
}
}
ptr[current] = 0;
}
int main(int argc, char** argv)
{
char str[256] = {0,};
gets(str);
removeDuplicates(str);
printf("%s\n", str);
return 1;
}Solution
Remove Duplicates: (Re-Write Ant's code so it is readable).
#include
void removeDuplicates(char* ptr);
void removeDuplicates(char* ptr)
{
char seen[UCHAR_MAX+1] = {0};
//
// Smallest data type is a char (unless you want to bit
// twiddle, and that has been shown to be not worth it
// see the complaints about std::vector in C++).
// 256 bytes is not much.
//
// By using char as they type it prevents all the complex
// multiplication/division an bit twiddling that Ant was doing.
unsigned char* source = (unsigned char*)ptr;
unsigned char* dest = (unsigned char*)ptr;
unsigned char next;
do
{
next = *source; // Get next character.
if (!seen[next]) // Only enter the `if block` if we have not seen it.
{
seen[next] = 1; // Mark it as seen
*dest = next; // Move it to destination.
// Note: source will change faster than dest when we
// start seeing dupes, this acts as the copy back
// over the duplicates.
++dest;
}
++source;
}
while (next != '\0'); // Once we have copied the null terminator quit.
}Code Snippets
#include <stdio.h>
void removeDuplicates(char* ptr);
void removeDuplicates(char* ptr)
{
char seen[UCHAR_MAX+1] = {0};
//
// Smallest data type is a char (unless you want to bit
// twiddle, and that has been shown to be not worth it
// see the complaints about std::vector<bool> in C++).
// 256 bytes is not much.
//
// By using char as they type it prevents all the complex
// multiplication/division an bit twiddling that Ant was doing.
unsigned char* source = (unsigned char*)ptr;
unsigned char* dest = (unsigned char*)ptr;
unsigned char next;
do
{
next = *source; // Get next character.
if (!seen[next]) // Only enter the `if block` if we have not seen it.
{
seen[next] = 1; // Mark it as seen
*dest = next; // Move it to destination.
// Note: source will change faster than dest when we
// start seeing dupes, this acts as the copy back
// over the duplicates.
++dest;
}
++source;
}
while (next != '\0'); // Once we have copied the null terminator quit.
}Context
StackExchange Code Review Q#5441, answer score: 7
Revisions (0)
No revisions yet.