patternpythonModerate
Adding data from multiple Excel files to two lists
Viewed 0 times
exceladdingtwofilesmultiplelistsfromdata
Problem
I have 5 nested
for loops below, to add rows of data from multiple files to one of two lists. Is there a more pythonic way of doing this? I've come across an iterator-generator method named iteritems() -- could this be used to make this code more pythonic? # 5 nested loops
for root,dirs,files in os.walk(src):
files = [ _ for _ in files if _.endswith('.xlsx') ]
for file in files:
wb = xlrd.open_workbook(os.path.join(root,file))
worksheets = wb.sheet_names()
for worksheet_name in worksheets:
if worksheet_name.rfind('7600') != -1 :
sheet = wb.sheet_by_name(worksheet_name)
keys = [sheet.cell(3, col_index).value for col_index in xrange(sheet.ncols)]
for row_index in xrange(4, sheet.nrows):
d = {keys[col_index]: sheet.cell(row_index, col_index).value
for col_index in xrange(sheet.ncols)}
if file.rfind('oam') != -1 :
list_7600EoX.append(d)
else:
list_7600EoX_OAM.append(d)Solution
Your problem is not with 5 (or more) loops. Your problem is that you mixing code of different nature: walking over filesystem and name matching, and processing of files into single chunk of code.
Separate it into different functions calling each other:
Another option is to use generators to hide some details on how iteration is performed. For example, instead of walking over filesystem, let generator yield workbooks:
Separate it into different functions calling each other:
def process_worksheet(wb, worksheet_name):
sheet = wb.sheet_by_name(worksheet_name)
# ...
def process_xslx(path):
wb = xlrd.open_workbook(path)
worksheets = wb.sheet_names()
for worksheet_name in worksheets:
if worksheet_name.rfind('7600') != -1 :
process_worksheet(wb, worksheet_name)
for root,dirs,files in os.walk(src):
files = [ _ for _ in files if _.endswith('.xlsx') ]
for file in files:
process_xslx(os.path.join(root, file))Another option is to use generators to hide some details on how iteration is performed. For example, instead of walking over filesystem, let generator yield workbooks:
def walk_xlsx(src):
for root,dirs,files in os.walk(src):
files = [ _ for _ in files if _.endswith('.xlsx') ]
for file in files:
wb = xlrd.open_workbook(os.path.join(root, file))
yield wb
for wb in walk_xlsx(src):
# filter() is also a generator which yields only
# worksheet names that have '7600' in their names
worksheets = filter(lambda wn: '7600' in wn, wb.sheet_names())
for worksheet_name in worksheets:
# ...Code Snippets
def process_worksheet(wb, worksheet_name):
sheet = wb.sheet_by_name(worksheet_name)
# ...
def process_xslx(path):
wb = xlrd.open_workbook(path)
worksheets = wb.sheet_names()
for worksheet_name in worksheets:
if worksheet_name.rfind('7600') != -1 :
process_worksheet(wb, worksheet_name)
for root,dirs,files in os.walk(src):
files = [ _ for _ in files if _.endswith('.xlsx') ]
for file in files:
process_xslx(os.path.join(root, file))def walk_xlsx(src):
for root,dirs,files in os.walk(src):
files = [ _ for _ in files if _.endswith('.xlsx') ]
for file in files:
wb = xlrd.open_workbook(os.path.join(root, file))
yield wb
for wb in walk_xlsx(src):
# filter() is also a generator which yields only
# worksheet names that have '7600' in their names
worksheets = filter(lambda wn: '7600' in wn, wb.sheet_names())
for worksheet_name in worksheets:
# ...Context
StackExchange Code Review Q#91769, answer score: 12
Revisions (0)
No revisions yet.