patterncsharpMinor
A super string cleaner
Viewed 0 times
supercleanerstring
Problem
I need a method that clean up a string considering these few rules:
I did some google if exists something that already do this, but found nothing, so I came up with this:
```
public static class StringHelper
{
private static readonly Dictionary _specialChars
= new Dictionary
{
{ 'á', 'a' },{ 'à', 'a' },{ 'â', 'a' },{ 'ã', 'a' },{ 'ä', 'a' },
{ 'é', 'e' },{ 'è', 'e' },{ 'ê', 'e' },{ 'ë', 'e' },
{ 'í', 'i' },{ 'ì', 'i' },{ 'î', 'i' },{ 'ï', 'i' },
{ 'ó', 'o' },{ 'ò', 'o' },{ 'õ', 'o' },{ 'ö', 'o' },
{ 'ú', 'u' },{ 'ù', 'u' },{ 'û', 'u' },{ 'ü', 'u' },
{ 'ç', 'c' },{ 'ñ', 'n' }
};
static StringHelper()
{
foreach (var item in _specialChars.ToList())
{
_specialChars.Add(
item.Key.ToString().ToUpper()[0],
item.Value.ToString().ToUpper()[0]);
}
}
private static readonly Regex _spaceRegex = new Regex(@"\s", RegexOptions.Compiled);
private static readonly Regex _numericRegex = new Regex(@"\s", RegexOptions.Compiled);
public static string Normalize(this string str, CharType normaliar = CharType.All)
{
if (normaliar.HasFlag(CharType.Spaces))
{
str = _spaceRegex.Replace(str, string.Empty);
}
if (normaliar.HasFlag(CharType.Special))
{
str = _specialChars.ToList()
.Aggregate(str, (ac, item) => ac.Replace(item.Key, item.Value));
}
if (normaliar.HasFlag(CharType.Numerics))
{
str = _numericRegex.Replace(str, string.Empty);
}
if (normaliar.HasFlag(CharType.UpperCase))
{
str = str.ToLower();
}
return str;
}
}
public enum CharType
{
Spaces = 1,
Special = 2,
Numerics
- Remove or not Spaces
- Remove or not Special Chars
- Remove or not Numbers
- Descaptalize or not the string (this is don't really needed but I included it anyway).
I did some google if exists something that already do this, but found nothing, so I came up with this:
```
public static class StringHelper
{
private static readonly Dictionary _specialChars
= new Dictionary
{
{ 'á', 'a' },{ 'à', 'a' },{ 'â', 'a' },{ 'ã', 'a' },{ 'ä', 'a' },
{ 'é', 'e' },{ 'è', 'e' },{ 'ê', 'e' },{ 'ë', 'e' },
{ 'í', 'i' },{ 'ì', 'i' },{ 'î', 'i' },{ 'ï', 'i' },
{ 'ó', 'o' },{ 'ò', 'o' },{ 'õ', 'o' },{ 'ö', 'o' },
{ 'ú', 'u' },{ 'ù', 'u' },{ 'û', 'u' },{ 'ü', 'u' },
{ 'ç', 'c' },{ 'ñ', 'n' }
};
static StringHelper()
{
foreach (var item in _specialChars.ToList())
{
_specialChars.Add(
item.Key.ToString().ToUpper()[0],
item.Value.ToString().ToUpper()[0]);
}
}
private static readonly Regex _spaceRegex = new Regex(@"\s", RegexOptions.Compiled);
private static readonly Regex _numericRegex = new Regex(@"\s", RegexOptions.Compiled);
public static string Normalize(this string str, CharType normaliar = CharType.All)
{
if (normaliar.HasFlag(CharType.Spaces))
{
str = _spaceRegex.Replace(str, string.Empty);
}
if (normaliar.HasFlag(CharType.Special))
{
str = _specialChars.ToList()
.Aggregate(str, (ac, item) => ac.Replace(item.Key, item.Value));
}
if (normaliar.HasFlag(CharType.Numerics))
{
str = _numericRegex.Replace(str, string.Empty);
}
if (normaliar.HasFlag(CharType.UpperCase))
{
str = str.ToLower();
}
return str;
}
}
public enum CharType
{
Spaces = 1,
Special = 2,
Numerics
Solution
private static readonly Regex _numericRegex = new Regex(@"\s", RegexOptions.Compiled);That should be
\d, not \s.Instead of
_specialChars.Add(
item.Key.ToString().ToUpper()[0],
item.Value.ToString().ToUpper()[0]);You can write
_specialChars.Add(
Char.ToUpperInvariant(item.Key),
Char.ToUpperInvariant(item.Value));I find it a bit cleaner, and there is less work involved.
If the text is potentially very long, you may be better off doing just one pass of the string. You would need to profile against the sort of data you have, but it could look something like this
var spaces = normaliar.HasFlag(CharType.Spaces);
var numerics = normaliar.HasFlag(CharType.Numerics);
var uppercase = normaliar.HasFlag(CharType.UpperCase);
var special = normaliar.HasFlag(CharType.Special);
var sb = new StringBuilder(str.Length);
for (var i = 0; i < str.Length; i++)
{
var c = str[i];
if (spaces && Char.IsWhiteSpace(c))
{
continue;
}
if (numerics && Char.IsNumber(c))
{
continue;
}
if (uppercase)
{
c = Char.ToLowerInvariant(c);
}
if (special)
{
char replacement;
if (_specialChars.TryGetValue(c, out replacement))
{
c = replacement;
}
}
sb.Append(c);
}
return sb.ToString();Code Snippets
private static readonly Regex _numericRegex = new Regex(@"\s", RegexOptions.Compiled);_specialChars.Add(
item.Key.ToString().ToUpper()[0],
item.Value.ToString().ToUpper()[0]);_specialChars.Add(
Char.ToUpperInvariant(item.Key),
Char.ToUpperInvariant(item.Value));var spaces = normaliar.HasFlag(CharType.Spaces);
var numerics = normaliar.HasFlag(CharType.Numerics);
var uppercase = normaliar.HasFlag(CharType.UpperCase);
var special = normaliar.HasFlag(CharType.Special);
var sb = new StringBuilder(str.Length);
for (var i = 0; i < str.Length; i++)
{
var c = str[i];
if (spaces && Char.IsWhiteSpace(c))
{
continue;
}
if (numerics && Char.IsNumber(c))
{
continue;
}
if (uppercase)
{
c = Char.ToLowerInvariant(c);
}
if (special)
{
char replacement;
if (_specialChars.TryGetValue(c, out replacement))
{
c = replacement;
}
}
sb.Append(c);
}
return sb.ToString();Context
StackExchange Code Review Q#79037, answer score: 4
Revisions (0)
No revisions yet.