HiveBrain v1.2.0
Get Started
← Back to all entries
patterncsharpMinor

Script that parses the Chase.com page for my recent payments

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
scriptchasetherecentthatparsespageforcompayments

Problem

I wrote this code earlier to parse Chase.com's online transactions page. It's written in WinForms.

-
stepBtn is a button that starts this.

-
wb is a WebBrowser that is already navigated to the page.

Can you give me advice? I'm primarily looking for optimizations, but any other advice would be great.

private void stepBtn_Click(object sender, EventArgs e)
{
    List date = new List();
    List desc = new List();
    List amt = new List();
    //Parse for date
    var links = wb.Document.GetElementsByTagName("span");
    foreach (HtmlElement link in links)
    {
        if (link.InnerText != null)
        {
            string inner = link.InnerText.Trim();
            if (inner == "Pending" || Regex.IsMatch(inner, @"^(0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])[- \/.](19|20)\d\d$"))
            {
                date.Add(inner);
            }
        }
    }

//Parse for description
links = wb.Document.GetElementsByTagName("td");
foreach (HtmlElement link in links)
{
    if (link.GetAttribute("classname") == "cellStyle")
    {
        desc.Add(link.InnerText);
    }
}

//Parse for amount
//Assuming I never make a $1000+ purchase, every number  2 && link.InnerText != null && link.InnerText.Trim().Length >= 5)
    {
        string inner = link.InnerText.Trim();
        if (inner.Substring(0, 1) == "$" && Convert.ToDouble(inner.Substring(1)) < 1000)
        {
            amt.Add(Convert.ToDouble(inner.Substring(1)));
        }
    }
    times++;
}

//Check to make sure all lists have the same length
if (date.Count != amt.Count || amt.Count != desc.Count)
{
    MessageBox.Show("The three arrays do not have the same length.\n\nDate: " + date.Count + "\nAmt: " + amt.Count + "\nDesc: " + desc.Count, Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
    return;
}

//Output the arrays to the first checklist
for (int i = 0; i < date.Count; i++)
{
    firstCheck.Items.Add(date[i] + ": " + desc[i] + " (" + amt[i] + ")");
}
}

Solution

Single responibility principle

You method violates the SRP because it does to many things.

  • it parses HtmlElements for 3 different types



  • it validates the resulting List



  • updates the UI



If you need comments like

//Parse for date
//Parse for description
//...


then it is a clear sign that these parts should be extracted to separate methods.

A much cleaner way would be

private const string dateRegexPattern = @"^(0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])[- \/.](19|20)\d\d$"
private List ParseForPurchaseDates(HtmlDocument document)
{
    var purchaseDates = new List();

    var links = document.GetElementsByTagName("span");

    foreach (HtmlElement link in links)
    {
        if (link.InnerText == null) { continue; }

        string inner = link.InnerText.Trim();
        if (inner == "Pending" || Regex.IsMatch(inner,dateRegexPattern ))
        {
            purchaseDates.Add(inner);
        }

    }
    return purchaseDates;
}


if you do this for the other parsings too, your click eventhandler could look like

private void stepBtn_Click(object sender, EventArgs e)
{
    List date = ParseForPurchaseDates(wb.Document);
    List desc = ParseForPurchaseDescriptions(wb.Document);
    List amt = ParseForPurchaseAmounts(wb.Document);

    if (ListCountIsValid(date, desc, amt))
    {
        // update the UI
        return;
    }

    MessageBox.Show("The three arrays do not have the same length.\n\nDate: " + date.Count + "\nAmt: " + amt.Count + "\nDesc: " + desc.Count, Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
}

private bool ListCountIsValid(List purchaseDates, List purchaseDescriptions, List purchaseAmounts)
{
    return purchaseDates.Count == purchaseAmounts.Count  
        && purchaseDescriptions.Count == purchaseAmounts.Count;
}


For the parsing of the amount I would like to suggest to add a second parameter which should be optional to replace the hardcoded value 1000.

private List ParseForPurchaseAmounts(HtmlDocument document, double maxAmount = 1000d)
{
    //....
}


Naming

Naming is a very important part of programming. Using abbreviations instead of the complete word will reduce readability and makes it hard to maintain. So give Bob the maintainer a chance to do his/her job easy and fast by using proper names.

amt should be e.g amounts or purchaseAmounts ...

Code Snippets

//Parse for date
//Parse for description
//...
private const string dateRegexPattern = @"^(0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])[- \/.](19|20)\d\d$"
private List<string> ParseForPurchaseDates(HtmlDocument document)
{
    var purchaseDates = new List<string>();

    var links = document.GetElementsByTagName("span");

    foreach (HtmlElement link in links)
    {
        if (link.InnerText == null) { continue; }

        string inner = link.InnerText.Trim();
        if (inner == "Pending" || Regex.IsMatch(inner,dateRegexPattern ))
        {
            purchaseDates.Add(inner);
        }

    }
    return purchaseDates;
}
private void stepBtn_Click(object sender, EventArgs e)
{
    List<string> date = ParseForPurchaseDates(wb.Document);
    List<string> desc = ParseForPurchaseDescriptions(wb.Document);
    List<double> amt = ParseForPurchaseAmounts(wb.Document);

    if (ListCountIsValid(date, desc, amt))
    {
        // update the UI
        return;
    }

    MessageBox.Show("The three arrays do not have the same length.\n\nDate: " + date.Count + "\nAmt: " + amt.Count + "\nDesc: " + desc.Count, Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
}

private bool ListCountIsValid(List<string> purchaseDates, List<string> purchaseDescriptions, List<double> purchaseAmounts)
{
    return purchaseDates.Count == purchaseAmounts.Count  
        && purchaseDescriptions.Count == purchaseAmounts.Count;
}
private List<double> ParseForPurchaseAmounts(HtmlDocument document, double maxAmount = 1000d)
{
    //....
}

Context

StackExchange Code Review Q#93173, answer score: 8

Revisions (0)

No revisions yet.