HiveBrain v1.2.0
Get Started
← Back to all entries
patterncsharpMinor

Extracting a number from provided URLs inside a text file

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
numberfiletextprovidedextractinginsidefromurls

Problem

I need to extract a ID (int number) from a Url.

Example: http://www.example.com/foo/bar/12345

For this I wrote 4 methods where the first one ReadIDsFromFile() is called by my constructor and the return value is set to a properties of this class. The methods are called in the order I posted them below.

private List ReadIDsFromFile(string path)
    {
        // path is the full qualified path to a txt file. C:\text.txt
        List TweetIDsList = new List();
        string temp = string.Empty;

        using (StreamReader sr = new StreamReader(path))
        {
            while ((temp = sr.ReadLine()) != null)
            {
                if (ValidateUrl(temp))
                {
                    TweetIDsList.Add(ExtractID(temp));
                }
                else
                {
                    Logger.Log("Invalid URL: {0}", temp);
                }
            }
        }

        return TweetIDsList;
    }

private bool ValidateUrl(string url)
{
    Uri uriResult;
    bool result;
    return result = Uri.TryCreate(url, UriKind.Absolute, out uriResult) && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
}

private string ExtractID(string url)
{
    string id = string.Empty;
    char[] urlArray = url.ToCharArray();
    int result;
    for (int i = urlArray.Length - 1; i >= 0; i--)
    {
        if (Int32.TryParse(urlArray[i].ToString(), out result))
        {
            id += result;
        }
        else
        {
            break; // break loop. If tryparse fails it means we have reached a character which is not a number, probably a forward slash.
        }
    }

    return ReverseNumber(id);
}

private string ReverseNumber(string id)
{
    char[] tempArray = id.ToCharArray();
    string result = string.Empty;
    for (int i = tempArray.Length -1; i >= 0; i--)
    {
        result += tempArray[i];
    }
    return result;
}


My code is working without problems so far but I feel like that it is overl

Solution

You can use a regular expression to find the ID. The pattern [0-9]+$ will match one or more occurrences of 0-9 at the end of the string. You can use it like this:

private static readonly Regex UrlId = new Regex("[0-9]+$");

private static string ExtractID(string url)
{
    var match = UrlId.Match(url);
    return match.Success
        ? match.Captures[0].Value
        : string.Empty;
}


Instead of using a StreamReader, consider using File.ReadLines

foreach (var line in File.ReadLines(path))
{
    if (ValidateUrl(line))
    {
        TweetIDsList.Add(ExtractID(line));
    }
    else
    {
        Logger.Log("Invalid URL: {0}", line);
    }
}


You can remove the variable result from ValidateUrl:

private static bool ValidateUrl(string url)
{
    Uri uriResult;
    return Uri.TryCreate(url, UriKind.Absolute, out uriResult) &&
        (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
}

Code Snippets

private static readonly Regex UrlId = new Regex("[0-9]+$");

private static string ExtractID(string url)
{
    var match = UrlId.Match(url);
    return match.Success
        ? match.Captures[0].Value
        : string.Empty;
}
foreach (var line in File.ReadLines(path))
{
    if (ValidateUrl(line))
    {
        TweetIDsList.Add(ExtractID(line));
    }
    else
    {
        Logger.Log("Invalid URL: {0}", line);
    }
}
private static bool ValidateUrl(string url)
{
    Uri uriResult;
    return Uri.TryCreate(url, UriKind.Absolute, out uriResult) &&
        (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
}

Context

StackExchange Code Review Q#86088, answer score: 4

Revisions (0)

No revisions yet.