patternpythonMinor
Recursively listing files in Python
Viewed 0 times
listingfilespythonrecursively
Problem
I'm a complete amateur at python. Other than hello world programs, this is the first thing I've ever done in Python.
I cannot find a good way to make os.walk function the way I want it to so I worked on my own function for it.
I'm curious which would be the best way to do something.
or
Basically of those, is checking the if once, and traversing the list twice best or is traversing the list once, but checking the if on each entry best?
I cannot find a good way to make os.walk function the way I want it to so I worked on my own function for it.
I'm curious which would be the best way to do something.
import os
def atDirList(startDir, maxDepth=0, minDepth=0, curDepth=0):
output = []
curDir = []
curDir = os.listdir(startDir)
if curDepth >= minDepth:
for item in curDir:
fullItem = os.path.join(startDir,item)
if os.path.isfile(fullItem):
output.append(fullItem)
if curDepth+1 <= maxDepth:
for item in curDir:
fullItem = os.path.join(startDir,item)
if os.path.isdir(fullItem):
output = output+atDirList(fullItem,maxDepth,minDepth,curDepth+1)
return output
print(atDirList('/music/main'))or
import os
def atDirList(startDir, maxDepth=0, minDepth=0, curDepth=0):
output = []
curDir = []
curDir = os.listdir(startDir)
if curDepth >= minDepth:
for item in curDir:
fullItem = os.path.join(startDir,item)
if os.path.isfile(fullItem) and curDepth >= minDepth:
output.append(fullItem)
elif os.path.isdir(fullItem) and curDepth+1 <= maxDepth:
output = output+atDirList(fullItem,maxDepth,minDepth,curDepth+1)
return output
print(atDirList('/music/main'))Basically of those, is checking the if once, and traversing the list twice best or is traversing the list once, but checking the if on each entry best?
Solution
In Python you usually want to avoid having a recursive solution, because of the maximum recursion depth. Here this will probably not be a problem, because it is 1000 by default (so unless you directory hierarchy is 1000 levels deep, you are fine).
However, I would at least make your functions generators. They have the advantage that you don't need to generate the whole list in one go, but one element at a time. For this, Python 3 has even an additional nice feature,
So, your second function would become:
Note that I also added a
As an alternative solution, I would propose to use
Here I changed the parameter names to conform to Python's official style-guide, PEP8, by using
The
An iterable (like a list or a string) multiplied with an integer is just the iterable repeated n times:
A
Putting this together, this just joins the base path with
Possible bug:
After having written this alternative implementation I noticed some strange behavior of your (second, I did not check the first) function. When
This is what I would write with the additional constraints of having to return a list and possibly have a simple pattern (simple enough to be expressed with wildcard characters or an additional filter function):
However, I would at least make your functions generators. They have the advantage that you don't need to generate the whole list in one go, but one element at a time. For this, Python 3 has even an additional nice feature,
yield from. Note that you will have to call list on the result if you need the whole list at once (which negates any positive effect of using a generator), but that you can directly iterate over the output of a generatorSo, your second function would become:
def atDirList(startDir, maxDepth=0, minDepth=0, curDepth=0):
if curDepth >= minDepth:
for item in os.listdir(startDir):
fullItem = os.path.join(startDir, item)
try:
if os.path.isfile(fullItem) and curDepth >= minDepth:
yield fullItem
elif os.path.isdir(fullItem) and curDepth + 1 <= maxDepth:
yield from atDirList(fullItem, maxDepth, minDepth, curDepth + 1)
except OSError:
continueNote that I also added a
try..except block to skip over files for which you don't have enough permissions to check if they are a file or directory.As an alternative solution, I would propose to use
glob, which basically shell expands a string like "/home/graipher//" to a list of all files and directories matching this pattern. This has two caveats, first we still need to filter out the files from the directories (easily achieved with filter) and second, this ignores files which start with a . (hidden files).import glob
import os
def glob_list(start, max_depth=0, min_depth=0):
# start out at least `min_depth` levels deep
current_dir = os.path.join(start, *"*" * min_depth)
for depth in range(min_depth, max_depth + 1):
# go one level deeper
current_dir = os.path.join(current_dir, "*")
# print(current_dir)
yield from filter(os.path.isfile, glob.iglob(current_dir))
if __name__ == "__main__":
for file_name in glob_list(os.path.expanduser("~"), max_depth=2, min_depth=1):
print(file_name)Here I changed the parameter names to conform to Python's official style-guide, PEP8, by using
lower_case instead of camelCase.The
"" * min_depth part is probably slightly complicated, because of all the stars. Let's break it down:An iterable (like a list or a string) multiplied with an integer is just the iterable repeated n times:
>>> "a" * 3
"aaa"
>>> min_depth = 2
>>> "*" * min_depth
"**"A
* can be used to unpack a tuple into function arguments, like in this function that takes an arbitrary number of parameters:>>> def f(*args):
... print(args)
...
>>> f(*"aaa")
('a', 'a', 'a')
>>> f(*"**")
('*', '*')Putting this together, this just joins the base path with
min_depth + 1 levels of stars:>>> os.path.join("/home/graipher", *"*" * min_depth)
'/home/graipher/*/*'Possible bug:
After having written this alternative implementation I noticed some strange behavior of your (second, I did not check the first) function. When
minDepth is greater than zero, you would not expect any files from the base directory. However, they are still included (note that you need to start with a curDepth = minDepth, otherwise your script will never run).This is what I would write with the additional constraints of having to return a list and possibly have a simple pattern (simple enough to be expressed with wildcard characters or an additional filter function):
def glob_list(start, max_depth=0, min_depth=0, pattern="*", func=os.path.isfile):
output = []
for depth in range(min_depth, max_depth + 1):
path = os.path.join(start, *("*" * depth), pattern)
output.extend(filter(func, glob.iglob(path)))
return outputCode Snippets
def atDirList(startDir, maxDepth=0, minDepth=0, curDepth=0):
if curDepth >= minDepth:
for item in os.listdir(startDir):
fullItem = os.path.join(startDir, item)
try:
if os.path.isfile(fullItem) and curDepth >= minDepth:
yield fullItem
elif os.path.isdir(fullItem) and curDepth + 1 <= maxDepth:
yield from atDirList(fullItem, maxDepth, minDepth, curDepth + 1)
except OSError:
continueimport glob
import os
def glob_list(start, max_depth=0, min_depth=0):
# start out at least `min_depth` levels deep
current_dir = os.path.join(start, *"*" * min_depth)
for depth in range(min_depth, max_depth + 1):
# go one level deeper
current_dir = os.path.join(current_dir, "*")
# print(current_dir)
yield from filter(os.path.isfile, glob.iglob(current_dir))
if __name__ == "__main__":
for file_name in glob_list(os.path.expanduser("~"), max_depth=2, min_depth=1):
print(file_name)>>> "a" * 3
"aaa"
>>> min_depth = 2
>>> "*" * min_depth
"**">>> def f(*args):
... print(args)
...
>>> f(*"aaa")
('a', 'a', 'a')
>>> f(*"**")
('*', '*')>>> os.path.join("/home/graipher", *"*" * min_depth)
'/home/graipher/*/*'Context
StackExchange Code Review Q#161989, answer score: 3
Revisions (0)
No revisions yet.