HiveBrain v1.2.0
Get Started
← Back to all entries
patterncsharpMinor

A super string cleaner

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
supercleanerstring

Problem

I need a method that clean up a string considering these few rules:

  • Remove or not Spaces



  • Remove or not Special Chars



  • Remove or not Numbers



  • Descaptalize or not the string (this is don't really needed but I included it anyway).



I did some google if exists something that already do this, but found nothing, so I came up with this:

```
public static class StringHelper
{
private static readonly Dictionary _specialChars
= new Dictionary
{
{ 'á', 'a' },{ 'à', 'a' },{ 'â', 'a' },{ 'ã', 'a' },{ 'ä', 'a' },
{ 'é', 'e' },{ 'è', 'e' },{ 'ê', 'e' },{ 'ë', 'e' },
{ 'í', 'i' },{ 'ì', 'i' },{ 'î', 'i' },{ 'ï', 'i' },
{ 'ó', 'o' },{ 'ò', 'o' },{ 'õ', 'o' },{ 'ö', 'o' },
{ 'ú', 'u' },{ 'ù', 'u' },{ 'û', 'u' },{ 'ü', 'u' },
{ 'ç', 'c' },{ 'ñ', 'n' }
};

static StringHelper()
{
foreach (var item in _specialChars.ToList())
{
_specialChars.Add(
item.Key.ToString().ToUpper()[0],
item.Value.ToString().ToUpper()[0]);
}
}

private static readonly Regex _spaceRegex = new Regex(@"\s", RegexOptions.Compiled);
private static readonly Regex _numericRegex = new Regex(@"\s", RegexOptions.Compiled);

public static string Normalize(this string str, CharType normaliar = CharType.All)
{
if (normaliar.HasFlag(CharType.Spaces))
{
str = _spaceRegex.Replace(str, string.Empty);
}

if (normaliar.HasFlag(CharType.Special))
{
str = _specialChars.ToList()
.Aggregate(str, (ac, item) => ac.Replace(item.Key, item.Value));
}

if (normaliar.HasFlag(CharType.Numerics))
{
str = _numericRegex.Replace(str, string.Empty);
}

if (normaliar.HasFlag(CharType.UpperCase))
{
str = str.ToLower();
}

return str;
}
}

public enum CharType
{
Spaces = 1,
Special = 2,
Numerics

Solution

private static readonly Regex _numericRegex = new Regex(@"\s", RegexOptions.Compiled);


That should be \d, not \s.

Instead of

_specialChars.Add(
    item.Key.ToString().ToUpper()[0],
    item.Value.ToString().ToUpper()[0]);


You can write

_specialChars.Add(
    Char.ToUpperInvariant(item.Key),
    Char.ToUpperInvariant(item.Value));


I find it a bit cleaner, and there is less work involved.

If the text is potentially very long, you may be better off doing just one pass of the string. You would need to profile against the sort of data you have, but it could look something like this

var spaces = normaliar.HasFlag(CharType.Spaces);
var numerics = normaliar.HasFlag(CharType.Numerics);
var uppercase = normaliar.HasFlag(CharType.UpperCase);
var special = normaliar.HasFlag(CharType.Special);

var sb = new StringBuilder(str.Length);

for (var i = 0; i < str.Length; i++)
{
    var c = str[i];
    if (spaces && Char.IsWhiteSpace(c))
    {
        continue;
    }

    if (numerics && Char.IsNumber(c))
    {
        continue;
    }

    if (uppercase)
    {
        c = Char.ToLowerInvariant(c);
    }

    if (special)
    {
        char replacement;
        if (_specialChars.TryGetValue(c, out replacement))
        {
            c = replacement;
        }
    }

    sb.Append(c);
}

return sb.ToString();

Code Snippets

private static readonly Regex _numericRegex = new Regex(@"\s", RegexOptions.Compiled);
_specialChars.Add(
    item.Key.ToString().ToUpper()[0],
    item.Value.ToString().ToUpper()[0]);
_specialChars.Add(
    Char.ToUpperInvariant(item.Key),
    Char.ToUpperInvariant(item.Value));
var spaces = normaliar.HasFlag(CharType.Spaces);
var numerics = normaliar.HasFlag(CharType.Numerics);
var uppercase = normaliar.HasFlag(CharType.UpperCase);
var special = normaliar.HasFlag(CharType.Special);

var sb = new StringBuilder(str.Length);

for (var i = 0; i < str.Length; i++)
{
    var c = str[i];
    if (spaces && Char.IsWhiteSpace(c))
    {
        continue;
    }

    if (numerics && Char.IsNumber(c))
    {
        continue;
    }

    if (uppercase)
    {
        c = Char.ToLowerInvariant(c);
    }

    if (special)
    {
        char replacement;
        if (_specialChars.TryGetValue(c, out replacement))
        {
            c = replacement;
        }
    }

    sb.Append(c);
}

return sb.ToString();

Context

StackExchange Code Review Q#79037, answer score: 4

Revisions (0)

No revisions yet.