HiveBrain v1.2.0
Get Started
← Back to all entries
patterncsharpModerate

Reading from text file with RegexMatch

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
readingfileregexmatchwithtextfrom

Problem

Below is the method that I have written for reading from a text file. While reading, I need to match a line string to a given regex, and if it matches, I need to add the line string to a collection.

private static void GetOrigionalRGBColours(string txtFile)
{
    string tempLineValue;
    Regex regex = new Regex(@"^\d+.?\d* \d+.?\d* \d+.?\d* SRGB$");

    using (StreamReader inputReader = new StreamReader(txtFile))
    {
        while (null != (tempLineValue = inputReader.ReadLine()))                     
        {
            if (regex.Match(tempLineValue).Success
                && tempLineValue != "1 1 1 SRGB"
                && tempLineValue != "0 0 0 SRGB")
            {
                string[] rgbArray = tempLineValue.Split(' ');
                RGBColour rgbColour = new RGBColour() { Red = Convert.ToDecimal(rgbArray[0]), Green = Convert.ToDecimal(rgbArray[1]), Blue = Convert.ToDecimal(rgbArray[2]) };
                originalColourList.Add(rgbColour);
            }
        }
    }
}


When this method is run for a text file of 4MB having 28653 lines, it takes around 3 minutes just to finish the above method. Also, as a result of the above run, originalColourList is populated with 582 items.

How can I improve the performance of this method? I had truncated the original text file for testing purpose. The actually text file size may go up to 60MB.

Solution

One option is to not perform any regular expression check at all. You can perform a quick match by checking if the line ends with SRGB and the try the split and convert and simply continue if it fails. Something along these lines:

private static void GetOrigionalRGBColours(string txtFile)
{
    using (StreamReader inputReader = new StreamReader(txtFile))
    {
        while (!inputReader.EndOfStream)                     
        {
            var line = inputReader.ReadLine();

            if (line.EndsWith(" SRGB")
                && line != "1 1 1 SRGB"
                && line != "0 0 0 SRGB")
            {
                try 
                {
                    string[] rgbArray = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    if (rgbArray.Length == 4)
                    {
                        RGBColour rgbColour = new RGBColour() { Red = Convert.ToDecimal(rgbArray[0]), Green = Convert.ToDecimal(rgbArray[1]), Blue = Convert.ToDecimal(rgbArray[2]) };
                        originalColourList.Add(rgbColour);
                    }
                }
                catch (FormatException) {}
                catch (OverflowException) {}
            }
        }
    }
}


Alternatively to the try-catch you can use decimal.TryParse() and ignore the line if it returns false for any of the 3 entries.

Update: Version with decimal.TryParse.

private static void GetOrigionalRGBColours(string txtFile)
{
    using (StreamReader inputReader = new StreamReader(txtFile))
    {
        while (!inputReader.EndOfStream)                     
        {
            var line = inputReader.ReadLine();

            if (line.EndsWith(" SRGB")
                && line != "1 1 1 SRGB"
                && line != "0 0 0 SRGB")
            {
                string[] rgbArray = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                if (rgbArray.Length != 4) { continue; }

                decimal red;
                var hasRed = decimal.TryParse(rgbArray[0], out red);
                decimal green;
                var hasGreen = decimal.TryParse(rgbArray[1], out green);
                decimal blue;
                var hasBlue = decimal.TryParse(rgbArray[2], out blue);

                if (hasRed && hasGreen && hasBlue)
                {
                    RGBColour rgbColour = new RGBColour() { Red = red, Green = green, Blue = blue };
                    originalColourList.Add(rgbColour);
                }
            }
        }
    }
}


I like that one better because I find try {} catch {} block always a bit ugly . It also conveys the intend of the code better.

Code Snippets

private static void GetOrigionalRGBColours(string txtFile)
{
    using (StreamReader inputReader = new StreamReader(txtFile))
    {
        while (!inputReader.EndOfStream)                     
        {
            var line = inputReader.ReadLine();

            if (line.EndsWith(" SRGB")
                && line != "1 1 1 SRGB"
                && line != "0 0 0 SRGB")
            {
                try 
                {
                    string[] rgbArray = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                    if (rgbArray.Length == 4)
                    {
                        RGBColour rgbColour = new RGBColour() { Red = Convert.ToDecimal(rgbArray[0]), Green = Convert.ToDecimal(rgbArray[1]), Blue = Convert.ToDecimal(rgbArray[2]) };
                        originalColourList.Add(rgbColour);
                    }
                }
                catch (FormatException) {}
                catch (OverflowException) {}
            }
        }
    }
}
private static void GetOrigionalRGBColours(string txtFile)
{
    using (StreamReader inputReader = new StreamReader(txtFile))
    {
        while (!inputReader.EndOfStream)                     
        {
            var line = inputReader.ReadLine();

            if (line.EndsWith(" SRGB")
                && line != "1 1 1 SRGB"
                && line != "0 0 0 SRGB")
            {
                string[] rgbArray = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                if (rgbArray.Length != 4) { continue; }

                decimal red;
                var hasRed = decimal.TryParse(rgbArray[0], out red);
                decimal green;
                var hasGreen = decimal.TryParse(rgbArray[1], out green);
                decimal blue;
                var hasBlue = decimal.TryParse(rgbArray[2], out blue);

                if (hasRed && hasGreen && hasBlue)
                {
                    RGBColour rgbColour = new RGBColour() { Red = red, Green = green, Blue = blue };
                    originalColourList.Add(rgbColour);
                }
            }
        }
    }
}

Context

StackExchange Code Review Q#40423, answer score: 12

Revisions (0)

No revisions yet.