patterncsharpMinor
Extracting a number from provided URLs inside a text file
Viewed 0 times
numberfiletextprovidedextractinginsidefromurls
Problem
I need to extract a ID (int number) from a Url.
Example:
For this I wrote 4 methods where the first one
My code is working without problems so far but I feel like that it is overl
Example:
http://www.example.com/foo/bar/12345For this I wrote 4 methods where the first one
ReadIDsFromFile() is called by my constructor and the return value is set to a properties of this class. The methods are called in the order I posted them below.private List ReadIDsFromFile(string path)
{
// path is the full qualified path to a txt file. C:\text.txt
List TweetIDsList = new List();
string temp = string.Empty;
using (StreamReader sr = new StreamReader(path))
{
while ((temp = sr.ReadLine()) != null)
{
if (ValidateUrl(temp))
{
TweetIDsList.Add(ExtractID(temp));
}
else
{
Logger.Log("Invalid URL: {0}", temp);
}
}
}
return TweetIDsList;
}
private bool ValidateUrl(string url)
{
Uri uriResult;
bool result;
return result = Uri.TryCreate(url, UriKind.Absolute, out uriResult) && (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
}
private string ExtractID(string url)
{
string id = string.Empty;
char[] urlArray = url.ToCharArray();
int result;
for (int i = urlArray.Length - 1; i >= 0; i--)
{
if (Int32.TryParse(urlArray[i].ToString(), out result))
{
id += result;
}
else
{
break; // break loop. If tryparse fails it means we have reached a character which is not a number, probably a forward slash.
}
}
return ReverseNumber(id);
}
private string ReverseNumber(string id)
{
char[] tempArray = id.ToCharArray();
string result = string.Empty;
for (int i = tempArray.Length -1; i >= 0; i--)
{
result += tempArray[i];
}
return result;
}My code is working without problems so far but I feel like that it is overl
Solution
You can use a regular expression to find the ID. The pattern
Instead of using a
You can remove the variable
[0-9]+$ will match one or more occurrences of 0-9 at the end of the string. You can use it like this:private static readonly Regex UrlId = new Regex("[0-9]+$");
private static string ExtractID(string url)
{
var match = UrlId.Match(url);
return match.Success
? match.Captures[0].Value
: string.Empty;
}Instead of using a
StreamReader, consider using File.ReadLinesforeach (var line in File.ReadLines(path))
{
if (ValidateUrl(line))
{
TweetIDsList.Add(ExtractID(line));
}
else
{
Logger.Log("Invalid URL: {0}", line);
}
}You can remove the variable
result from ValidateUrl:private static bool ValidateUrl(string url)
{
Uri uriResult;
return Uri.TryCreate(url, UriKind.Absolute, out uriResult) &&
(uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
}Code Snippets
private static readonly Regex UrlId = new Regex("[0-9]+$");
private static string ExtractID(string url)
{
var match = UrlId.Match(url);
return match.Success
? match.Captures[0].Value
: string.Empty;
}foreach (var line in File.ReadLines(path))
{
if (ValidateUrl(line))
{
TweetIDsList.Add(ExtractID(line));
}
else
{
Logger.Log("Invalid URL: {0}", line);
}
}private static bool ValidateUrl(string url)
{
Uri uriResult;
return Uri.TryCreate(url, UriKind.Absolute, out uriResult) &&
(uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
}Context
StackExchange Code Review Q#86088, answer score: 4
Revisions (0)
No revisions yet.