patternpythonMinor
Walk file system checking that backup files are present
Viewed 0 times
filepresentaresystemcheckingwalkthatfilesbackup
Problem
Everyday I manually check that that my systems were backed up. I use the windows file system through the Explorer gui and check that the backup files were modified on the previous day or later.
This script walks the file system backup directory and compares the files modified since the start of the previous day to a list I've prepared in the workfile.txt file.
There are a number of areas to address.
backproj.py
```
import datetime
import os
from os.path import join, getsize
from colorama import Fore, Back, Style
TODAY = datetime.date.today()
YESTERDAY = TODAY - datetime.timedelta(days=1)
# TODO: Move to config or CLI argument
working_directory = 'W:\\SQLServer_Backups_Recent'
def get_raw_names_from_file(file):
with open(file, 'r') as f: # TODO: Try/Catch if file doesn't exist?
return list(f)
def process_raw_file_names(raw_names):
for file in raw_names:
if ('') in file:
yest_file = file.replace(
'', '{d.month}-{d.day}-{d.year}-'.format(
d=YESTERDAY) + YESTERDAY.strftime('%a'))
else:
yest_file = file
if ('_' + str(YESTERDAY.year)) in yest_file:
nostamp_file = yest_file[:(yest_file.find(
'_' + str(YESTERDAY.year)))] + '\n'
else:
nostamp_file = yest_file
if ('') in nostamp_file:
clean_file = file.replace(
'', YESTERDAY.strftime('%Y-%b-%d'))
elif ('') in nostamp_file:
clean_
This script walks the file system backup directory and compares the files modified since the start of the previous day to a list I've prepared in the workfile.txt file.
There are a number of areas to address.
- When building the path\filename strings should I break out the 'ifs' into their own functions?
- When walking the file system I exclude directories by hard coding them in if statements. Should I check if a set of excluded directories is in the dirs list? I tried but I couldn't get it to work that way so I reverted to individual checks.
- Would be better to parse the dates out of the path and file names rather then hard coding the places to insert dates with strings like [yesterday]?
backproj.py
```
import datetime
import os
from os.path import join, getsize
from colorama import Fore, Back, Style
TODAY = datetime.date.today()
YESTERDAY = TODAY - datetime.timedelta(days=1)
# TODO: Move to config or CLI argument
working_directory = 'W:\\SQLServer_Backups_Recent'
def get_raw_names_from_file(file):
with open(file, 'r') as f: # TODO: Try/Catch if file doesn't exist?
return list(f)
def process_raw_file_names(raw_names):
for file in raw_names:
if ('') in file:
yest_file = file.replace(
'', '{d.month}-{d.day}-{d.year}-'.format(
d=YESTERDAY) + YESTERDAY.strftime('%a'))
else:
yest_file = file
if ('_' + str(YESTERDAY.year)) in yest_file:
nostamp_file = yest_file[:(yest_file.find(
'_' + str(YESTERDAY.year)))] + '\n'
else:
nostamp_file = yest_file
if ('') in nostamp_file:
clean_file = file.replace(
'', YESTERDAY.strftime('%Y-%b-%d'))
elif ('') in nostamp_file:
clean_
Solution
You can definitely put the excluded directories in a constant and iterate over it:
As an alternative, since the
Or, since directory names have to be unique in every directory:
Regarding the dates, the comparisons where you first call
You can also use
Your whole
You should avoid shadowing the built-in function
This
can now be better written as:
EXCLUDE = 'Marked', 'BackupNonCritical', 'Attachments', ...
def walk_file_system(working_directory):
...
for to_exclude in EXCLUDE:
if to_exclude in dirs:
dirs.remove(to_exclude)As an alternative, since the
if to_exclude in dirs might be costly (for a long list of sub-directories), you could convert them to sets:EXCLUDE = {'Marked', 'BackupNonCritical', 'Attachments', ...}
def walk_file_system(working_directory):
...
for to_exclude in EXCLUDE.intersection(dirs):
dirs.remove(to_exclude)Or, since directory names have to be unique in every directory:
def walk_file_system(working_directory):
...
dirs = set(dirs).difference(EXCLUDE)Regarding the dates, the comparisons where you first call
__str__ seem very hacky. Better use the fact that datetime.date objects are comparable:import datetime
TODAY = datetime.date.today()
YESTERDAY = TODAY - datetime.timedelta(days=1)
file_name = "foo.txt"
mod_time = datetime.datetime.fromtimestamp(os.stat(file_name).st_mtime)
if mod_time.date() > YESTERDAY:
print "new file"
if mod_time.date() == TODAY:
print "still a new file"You can also use
format's nice direct format specifiers for date objects:file.replace('', '{:%m-%d-%Y}-'.format(YESTERDAY)Your whole
process_raw_file_names function is actually very hard to understand at the moment. But I'm currently struggling to come up with good alternatives.You should avoid shadowing the built-in function
join with os.path.join. Since you actually use that function only once and in a short line, why not do this to follow the Python Zen "Explicit is better then implicit"import os
for name in files:
item = os.path.join(root, name)
mtime = datetime.datetime.fromtimestamp(os.stat(file_name).st_mtime)
...This
file_list = (join(root, name) for name in files)
for item in file_list:
mtime = datetime.datetime.fromtimestamp(
int(os.stat(item).st_mtime)).strftime('%Y-%m-%d')
if mtime >= (YESTERDAY).__str__():
if ('_' + str(YESTERDAY.year)) in item:
clean_item = item[:(item.find('_' + str(YESTERDAY.year)))]
else:
clean_item = item
current_files.append(clean_item + '\n')can now be better written as:
year_suffix = "_{:%Y}".format(YESTERDAY)
for name in files:
item = os.path.join(root, name)
mtime = datetime.datetime.fromtimestamp(os.stat(item).st_mtime)
if mtime.date() >= YESTERDAY:
index = item.find(year_suffix)
if index != -1:
item = item[:index]
current_files.append(item + '\n')Code Snippets
EXCLUDE = 'Marked', 'BackupNonCritical', 'Attachments', ...
def walk_file_system(working_directory):
...
for to_exclude in EXCLUDE:
if to_exclude in dirs:
dirs.remove(to_exclude)EXCLUDE = {'Marked', 'BackupNonCritical', 'Attachments', ...}
def walk_file_system(working_directory):
...
for to_exclude in EXCLUDE.intersection(dirs):
dirs.remove(to_exclude)def walk_file_system(working_directory):
...
dirs = set(dirs).difference(EXCLUDE)import datetime
TODAY = datetime.date.today()
YESTERDAY = TODAY - datetime.timedelta(days=1)
file_name = "foo.txt"
mod_time = datetime.datetime.fromtimestamp(os.stat(file_name).st_mtime)
if mod_time.date() > YESTERDAY:
print "new file"
if mod_time.date() == TODAY:
print "still a new file"file.replace('<yesterday>', '{:%m-%d-%Y}-'.format(YESTERDAY)Context
StackExchange Code Review Q#144148, answer score: 2
Revisions (0)
No revisions yet.