patternpythonMinor
Gmail Data Analysis
Viewed 0 times
analysisdatagmail
Problem
I am using Imaplib to fetch data from Gmail and get the number of emails each day. I am going to login using Imaplib twice for fetching emails for the inbox well as for the outbox (sent mail). Is it possible that I have to login only once and get result as I need?
The code works fine and I am asking for review that how I can do more optimizations with this approach.
```
from imaplib import IMAP4_SSL
from datetime import date, timedelta, datetime
from time import mktime
from email.utils import parsedate
import email
import pygal
address = ''
password = ''
inbox = 'INBOX'
outbox = '[Gmail]/Sent Mail'
d = 6
def inbox_week(address,password,inbox,d):
Monday_Tuple = ('M','o','n')
Monday_List = []
Tuesday_Tuple = ('T','u','e')
Tuesday_List = []
Wednesday_Tuple = ('W','e','d')
Wednesday_List = []
Thursday_Tuple = ('T','h','u')
Thursday_List = []
Friday_Tuple = ('F','r','i')
Friday_List = []
Saturday_Tuple = ('S','a','t')
Saturday_List = []
Sunday_Tuple = ('S','u','n')
Sunday_List = []
mail = IMAP4_SSL('imap.gmail.com')
mail.login(address, password)
mail.select(inbox)
interval = (date.today()-timedelta(d)).strftime("%d-%b-%Y")
result, data = mail.uid('search',None,'(SENTSINCE{date})'.format(date=interval))
for num in data[0].split():
result, data = mail.uid('fetch',num,'(RFC822)')
msg = email.message_from_string(data[0][1])
msg['Date']
main_date = msg['Date']
Date_Tuple = main_date[0],main_date[1],main_date[2]
if (Date_Tuple==Monday_Tuple):
Monday_List.append(Monday_Tuple)
if (Date_Tuple == Tuesday_Tuple):
Tuesday_List.append(Tuesday_Tuple)
if (Date_Tuple == Wednesday_Tuple):
Wednesday_List.append(Wednesday_Tuple)
if (Date_Tuple == Thursday_Tuple):
Thursday_List.append(Thursday_Tuple)
if (Date_Tuple == Friday_Tuple):
The code works fine and I am asking for review that how I can do more optimizations with this approach.
```
from imaplib import IMAP4_SSL
from datetime import date, timedelta, datetime
from time import mktime
from email.utils import parsedate
import email
import pygal
address = ''
password = ''
inbox = 'INBOX'
outbox = '[Gmail]/Sent Mail'
d = 6
def inbox_week(address,password,inbox,d):
Monday_Tuple = ('M','o','n')
Monday_List = []
Tuesday_Tuple = ('T','u','e')
Tuesday_List = []
Wednesday_Tuple = ('W','e','d')
Wednesday_List = []
Thursday_Tuple = ('T','h','u')
Thursday_List = []
Friday_Tuple = ('F','r','i')
Friday_List = []
Saturday_Tuple = ('S','a','t')
Saturday_List = []
Sunday_Tuple = ('S','u','n')
Sunday_List = []
mail = IMAP4_SSL('imap.gmail.com')
mail.login(address, password)
mail.select(inbox)
interval = (date.today()-timedelta(d)).strftime("%d-%b-%Y")
result, data = mail.uid('search',None,'(SENTSINCE{date})'.format(date=interval))
for num in data[0].split():
result, data = mail.uid('fetch',num,'(RFC822)')
msg = email.message_from_string(data[0][1])
msg['Date']
main_date = msg['Date']
Date_Tuple = main_date[0],main_date[1],main_date[2]
if (Date_Tuple==Monday_Tuple):
Monday_List.append(Monday_Tuple)
if (Date_Tuple == Tuesday_Tuple):
Tuesday_List.append(Tuesday_Tuple)
if (Date_Tuple == Wednesday_Tuple):
Wednesday_List.append(Wednesday_Tuple)
if (Date_Tuple == Thursday_Tuple):
Thursday_List.append(Thursday_Tuple)
if (Date_Tuple == Friday_Tuple):
Solution
Firstly, I am going to make numerous references to Python's style guide. If you've adopted a different guide, you should mention it in the question; if you haven't adopted any, adopt this one. Following a consistent, conventional coding guide makes your code much easier for others to read.
Your imports are in the wrong order. This may seem like a minor thing, but it's useful to at least clarify visually which are third-party libraries, and alphabetical order makes it easier to scan for a given library:
I've also removed several you aren't actually using.
Constants should be named in
What exactly does:
mean? I cannot figure out why you're using it at all, and note that you replace it with whatever
At the very least add a comment explaining why
The whole
Also, a minor style point, but the parentheses in e.g.
Rather than create multiple separate variables, and making pointless lists of tuples, why not a list
In another breach of DRYness, note that your two functions are identical, except that they access different mailboxen. Why not make a single function, and specify the mailbox to use? In fact, I note that
Finally, the chart-writing bit seems OK, but I would be inclined to wrap that in a function too. You should generally have as little code as possible running at the top level of your script, preferring instead to define a specific entry point (conventionally called
This makes it easier to
Here's an alternative, making the changes I've suggested.
Note: I haven't been able to test it, but it should do exactly what the original did.
```
from datetime import date, timedelta
import email
from imaplib import IMAP4_SSL
import pygal
ADDRESS = '...'
PASSWORD = '...'
SERVER = 'imap.gmail.com'
INBOX = 'INBOX'
OUTBOX = '[Gmail]/Sent Mail'
DAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
def access_mail_server(server, address, password):
"""Connect to the specified mail server and log in."""
mail = IMAP4_SSL(server)
mail.login(address, password)
return mail
def list_mailbox_content(mail, since):
"""Get a list of IDs for all mail in the current mailbox."""
_, data = mail.uid('search', None, '(SENTSINCE {})'.format(since))
return data[0].split()
def fetch_message(mail, num):
"""Fetch a message from the current mailbox."""
_, data = mail.uid('fetch', num, '(RFC822)')
return email.message_from_string(data[0][1])
def one_week_ago():
"""Get the date from one week ago in the required format."""
return (date.today()-timedelta(days=6)).strftime("%d-%b-%Y")
def mailbox_counts(mail, mailbox):
"""Get the counts of emails by day over the last week."""
counts = [0 for _ in DAYS]
mail.select(mailbox)
for id_ in list_mailbox_content(mail, since=one_week_ago()):
msg = fetch_message(mail, id_)
counts[DAYS.index(msg['Date'][:3])] += 1
return counts
def graph_activity(mail, mailboxes):
"""Graph the last week's mail activity."""
bar_chart = pygal.Bar()
bar_chart.title = 'Weekly Email Analysis'
bar_chart.x_labels = DAYS
Your imports are in the wrong order. This may seem like a minor thing, but it's useful to at least clarify visually which are third-party libraries, and alphabetical order makes it easier to scan for a given library:
from datetime import date, timedelta
import email
from imaplib import IMAP4_SSL
import pygalI've also removed several you aren't actually using.
Constants should be named in
UPPERCASE_WITH_UNDERSCORES:ADDRESS = ''
PASSWORD = ''
...What exactly does:
d = 6mean? I cannot figure out why you're using it at all, and note that you replace it with whatever
inbox_week returns anyway, so by the time you call outbox_week you're actually passing Thursday's inbox count.At the very least add a comment explaining why
d is 6, or better yet use a name that makes it clear. In this case, it looks like it's supposed to be the number of days, so DAYS = 6 would be better.The whole
_Tuple, _List thing is completely bewildering, not least because the tuples are constants (again, UPPERCASE_WITH_UNDERSCORES, please) and should therefore be declared once, rather than redefined every time either function (for, in defiance of the Redundancy Department of Redundancy, you've repeated this code twice) gets called. I don't understand:- why you're creating the lists and the tuples at all, when you don't seem to care about building a list of emails (in fact, you put the tuple, rather than the email, into the list!)
- why you would create a tuple
('M', 'o', 'n')to compare to the first three characters of the date, rather than just comparing the stringmain_date[:3] == 'Mon'; and
- why you don't use
elif, even though there's no way more than one comparison can be true so you're just wasting cycles testing them.
Also, a minor style point, but the parentheses in e.g.
if (Date_Tuple == Tuesday_Tuple): are redundant.Rather than create multiple separate variables, and making pointless lists of tuples, why not a list
[mon_count, tue_count, ..., sun_count], or a dictionary {'Mon': mon_count, ...}? You could even use a collections.Counter, which will do most of the work for you.In another breach of DRYness, note that your two functions are identical, except that they access different mailboxen. Why not make a single function, and specify the mailbox to use? In fact, I note that
inbox and outbox are parameters to the function, despite being global constants, which doesn't make much sense. I would split this into more, smaller functions, each with an explanatory docstring.Finally, the chart-writing bit seems OK, but I would be inclined to wrap that in a function too. You should generally have as little code as possible running at the top level of your script, preferring instead to define a specific entry point (conventionally called
main) and calling it when the script is run directly with:if __name__ == '__main__':
main()This makes it easier to
import your functionality elsewhere later, without running it unnecessarily.Here's an alternative, making the changes I've suggested.
Note: I haven't been able to test it, but it should do exactly what the original did.
```
from datetime import date, timedelta
import email
from imaplib import IMAP4_SSL
import pygal
ADDRESS = '...'
PASSWORD = '...'
SERVER = 'imap.gmail.com'
INBOX = 'INBOX'
OUTBOX = '[Gmail]/Sent Mail'
DAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
def access_mail_server(server, address, password):
"""Connect to the specified mail server and log in."""
mail = IMAP4_SSL(server)
mail.login(address, password)
return mail
def list_mailbox_content(mail, since):
"""Get a list of IDs for all mail in the current mailbox."""
_, data = mail.uid('search', None, '(SENTSINCE {})'.format(since))
return data[0].split()
def fetch_message(mail, num):
"""Fetch a message from the current mailbox."""
_, data = mail.uid('fetch', num, '(RFC822)')
return email.message_from_string(data[0][1])
def one_week_ago():
"""Get the date from one week ago in the required format."""
return (date.today()-timedelta(days=6)).strftime("%d-%b-%Y")
def mailbox_counts(mail, mailbox):
"""Get the counts of emails by day over the last week."""
counts = [0 for _ in DAYS]
mail.select(mailbox)
for id_ in list_mailbox_content(mail, since=one_week_ago()):
msg = fetch_message(mail, id_)
counts[DAYS.index(msg['Date'][:3])] += 1
return counts
def graph_activity(mail, mailboxes):
"""Graph the last week's mail activity."""
bar_chart = pygal.Bar()
bar_chart.title = 'Weekly Email Analysis'
bar_chart.x_labels = DAYS
Code Snippets
from datetime import date, timedelta
import email
from imaplib import IMAP4_SSL
import pygalADDRESS = '<Your Email>'
PASSWORD = '<password>'
...if __name__ == '__main__':
main()from datetime import date, timedelta
import email
from imaplib import IMAP4_SSL
import pygal
ADDRESS = '...'
PASSWORD = '...'
SERVER = 'imap.gmail.com'
INBOX = 'INBOX'
OUTBOX = '[Gmail]/Sent Mail'
DAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
def access_mail_server(server, address, password):
"""Connect to the specified mail server and log in."""
mail = IMAP4_SSL(server)
mail.login(address, password)
return mail
def list_mailbox_content(mail, since):
"""Get a list of IDs for all mail in the current mailbox."""
_, data = mail.uid('search', None, '(SENTSINCE {})'.format(since))
return data[0].split()
def fetch_message(mail, num):
"""Fetch a message from the current mailbox."""
_, data = mail.uid('fetch', num, '(RFC822)')
return email.message_from_string(data[0][1])
def one_week_ago():
"""Get the date from one week ago in the required format."""
return (date.today()-timedelta(days=6)).strftime("%d-%b-%Y")
def mailbox_counts(mail, mailbox):
"""Get the counts of emails by day over the last week."""
counts = [0 for _ in DAYS]
mail.select(mailbox)
for id_ in list_mailbox_content(mail, since=one_week_ago()):
msg = fetch_message(mail, id_)
counts[DAYS.index(msg['Date'][:3])] += 1
return counts
def graph_activity(mail, mailboxes):
"""Graph the last week's mail activity."""
bar_chart = pygal.Bar()
bar_chart.title = 'Weekly Email Analysis'
bar_chart.x_labels = DAYS
for title, mailbox in mailboxes:
bar_chart.add(title, mailbox_counts(mail, mailbox))
bar_chart.render_to_file('honey4.svg')
if __name__ == '__main__':
graph_activity(
access_mail_server(SERVER, ADDRESS, PASSWORD),
[('Received', INBOX), ('Sent', OUTBOX)],
)Context
StackExchange Code Review Q#95456, answer score: 5
Revisions (0)
No revisions yet.