patterncsharpMinor
Script that parses the Chase.com page for my recent payments
Viewed 0 times
scriptchasetherecentthatparsespageforcompayments
Problem
I wrote this code earlier to parse Chase.com's online transactions page. It's written in WinForms.
-
-
Can you give me advice? I'm primarily looking for optimizations, but any other advice would be great.
-
stepBtn is a button that starts this.-
wb is a WebBrowser that is already navigated to the page.Can you give me advice? I'm primarily looking for optimizations, but any other advice would be great.
private void stepBtn_Click(object sender, EventArgs e)
{
List date = new List();
List desc = new List();
List amt = new List();
//Parse for date
var links = wb.Document.GetElementsByTagName("span");
foreach (HtmlElement link in links)
{
if (link.InnerText != null)
{
string inner = link.InnerText.Trim();
if (inner == "Pending" || Regex.IsMatch(inner, @"^(0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])[- \/.](19|20)\d\d$"))
{
date.Add(inner);
}
}
}
//Parse for description
links = wb.Document.GetElementsByTagName("td");
foreach (HtmlElement link in links)
{
if (link.GetAttribute("classname") == "cellStyle")
{
desc.Add(link.InnerText);
}
}
//Parse for amount
//Assuming I never make a $1000+ purchase, every number 2 && link.InnerText != null && link.InnerText.Trim().Length >= 5)
{
string inner = link.InnerText.Trim();
if (inner.Substring(0, 1) == "$" && Convert.ToDouble(inner.Substring(1)) < 1000)
{
amt.Add(Convert.ToDouble(inner.Substring(1)));
}
}
times++;
}
//Check to make sure all lists have the same length
if (date.Count != amt.Count || amt.Count != desc.Count)
{
MessageBox.Show("The three arrays do not have the same length.\n\nDate: " + date.Count + "\nAmt: " + amt.Count + "\nDesc: " + desc.Count, Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
return;
}
//Output the arrays to the first checklist
for (int i = 0; i < date.Count; i++)
{
firstCheck.Items.Add(date[i] + ": " + desc[i] + " (" + amt[i] + ")");
}
}Solution
Single responibility principle
You method violates the SRP because it does to many things.
If you need comments like
then it is a clear sign that these parts should be extracted to separate methods.
A much cleaner way would be
if you do this for the other parsings too, your click eventhandler could look like
For the parsing of the
Naming
Naming is a very important part of programming. Using abbreviations instead of the complete word will reduce readability and makes it hard to maintain. So give Bob the maintainer a chance to do his/her job easy and fast by using proper names.
You method violates the SRP because it does to many things.
- it parses
HtmlElementsfor 3 different types
- it validates the resulting
List
- updates the UI
If you need comments like
//Parse for date
//Parse for description
//...then it is a clear sign that these parts should be extracted to separate methods.
A much cleaner way would be
private const string dateRegexPattern = @"^(0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])[- \/.](19|20)\d\d$"
private List ParseForPurchaseDates(HtmlDocument document)
{
var purchaseDates = new List();
var links = document.GetElementsByTagName("span");
foreach (HtmlElement link in links)
{
if (link.InnerText == null) { continue; }
string inner = link.InnerText.Trim();
if (inner == "Pending" || Regex.IsMatch(inner,dateRegexPattern ))
{
purchaseDates.Add(inner);
}
}
return purchaseDates;
}if you do this for the other parsings too, your click eventhandler could look like
private void stepBtn_Click(object sender, EventArgs e)
{
List date = ParseForPurchaseDates(wb.Document);
List desc = ParseForPurchaseDescriptions(wb.Document);
List amt = ParseForPurchaseAmounts(wb.Document);
if (ListCountIsValid(date, desc, amt))
{
// update the UI
return;
}
MessageBox.Show("The three arrays do not have the same length.\n\nDate: " + date.Count + "\nAmt: " + amt.Count + "\nDesc: " + desc.Count, Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
}
private bool ListCountIsValid(List purchaseDates, List purchaseDescriptions, List purchaseAmounts)
{
return purchaseDates.Count == purchaseAmounts.Count
&& purchaseDescriptions.Count == purchaseAmounts.Count;
}For the parsing of the
amount I would like to suggest to add a second parameter which should be optional to replace the hardcoded value 1000. private List ParseForPurchaseAmounts(HtmlDocument document, double maxAmount = 1000d)
{
//....
}Naming
Naming is a very important part of programming. Using abbreviations instead of the complete word will reduce readability and makes it hard to maintain. So give Bob the maintainer a chance to do his/her job easy and fast by using proper names.
amt should be e.g amounts or purchaseAmounts ...Code Snippets
//Parse for date
//Parse for description
//...private const string dateRegexPattern = @"^(0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])[- \/.](19|20)\d\d$"
private List<string> ParseForPurchaseDates(HtmlDocument document)
{
var purchaseDates = new List<string>();
var links = document.GetElementsByTagName("span");
foreach (HtmlElement link in links)
{
if (link.InnerText == null) { continue; }
string inner = link.InnerText.Trim();
if (inner == "Pending" || Regex.IsMatch(inner,dateRegexPattern ))
{
purchaseDates.Add(inner);
}
}
return purchaseDates;
}private void stepBtn_Click(object sender, EventArgs e)
{
List<string> date = ParseForPurchaseDates(wb.Document);
List<string> desc = ParseForPurchaseDescriptions(wb.Document);
List<double> amt = ParseForPurchaseAmounts(wb.Document);
if (ListCountIsValid(date, desc, amt))
{
// update the UI
return;
}
MessageBox.Show("The three arrays do not have the same length.\n\nDate: " + date.Count + "\nAmt: " + amt.Count + "\nDesc: " + desc.Count, Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
}
private bool ListCountIsValid(List<string> purchaseDates, List<string> purchaseDescriptions, List<double> purchaseAmounts)
{
return purchaseDates.Count == purchaseAmounts.Count
&& purchaseDescriptions.Count == purchaseAmounts.Count;
}private List<double> ParseForPurchaseAmounts(HtmlDocument document, double maxAmount = 1000d)
{
//....
}Context
StackExchange Code Review Q#93173, answer score: 8
Revisions (0)
No revisions yet.