patternpythonMinor
Analyse an Nginx access log with awk and produce a report
Viewed 0 times
producelogwithawknginxreportanalyseandaccess
Problem
I'm looking for your opinions on anything that I am doing wrong in the application below, such as best practices, glaringly horrible errors or even just your own personal opinion.
Task: Grab an Nginx access log, ask it some questions and dump responses out to a templated report.
I have removed all comments from this post to keep the reading to a minimum. You can see the full version at GitHub.
```
#!/usr/bin/python
import datetime
from os import remove
from subprocess import Popen
from subprocess import PIPE
from subprocess import check_output
from string import Template
class NginxReporter():
location = None
template = 'templates/default.html'
html = None
def __init__(self, location=None, template=None, execute=False):
self.location = location
if template:
self.template = template
if execute:
self.parse()
self.output()
self.delete()
def scpdownload(self, remotelocation=None):
if remotelocation:
check_output(['scp', remotelocation, self.location])
def delete(self):
remove(self.location)
def parse(self):
html = {}
codes = Popen("awk '{print $9}' " + self.location +
" | sort | uniq -c | sort -r", shell=True, stdout=PIPE)
html['codes'] = self.build(codes)
statuses = [206, 301, 302, 400, 404, 408, 499, 500]
for status in statuses:
statuslist = Popen("awk '($9 ~ /" + str(status) + "/)' "
+ self.location + " | awk '{print $7}' " +
" | sort | uniq -c | sort -r",
shell=True, stdout=PIPE)
html["http_"+str(status)+"_code"] = self.build(statuslist)
html['generated'] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M')
self.html = html
def build(self, output):
block = ""
while True:
out = output.stdout.rea
Task: Grab an Nginx access log, ask it some questions and dump responses out to a templated report.
I have removed all comments from this post to keep the reading to a minimum. You can see the full version at GitHub.
```
#!/usr/bin/python
import datetime
from os import remove
from subprocess import Popen
from subprocess import PIPE
from subprocess import check_output
from string import Template
class NginxReporter():
location = None
template = 'templates/default.html'
html = None
def __init__(self, location=None, template=None, execute=False):
self.location = location
if template:
self.template = template
if execute:
self.parse()
self.output()
self.delete()
def scpdownload(self, remotelocation=None):
if remotelocation:
check_output(['scp', remotelocation, self.location])
def delete(self):
remove(self.location)
def parse(self):
html = {}
codes = Popen("awk '{print $9}' " + self.location +
" | sort | uniq -c | sort -r", shell=True, stdout=PIPE)
html['codes'] = self.build(codes)
statuses = [206, 301, 302, 400, 404, 408, 499, 500]
for status in statuses:
statuslist = Popen("awk '($9 ~ /" + str(status) + "/)' "
+ self.location + " | awk '{print $7}' " +
" | sort | uniq -c | sort -r",
shell=True, stdout=PIPE)
html["http_"+str(status)+"_code"] = self.build(statuslist)
html['generated'] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M')
self.html = html
def build(self, output):
block = ""
while True:
out = output.stdout.rea
Solution
The biggest problem, and the hardest to fix, is calling
You are not using the class attributes here:
So you could just delete these from your class.
The constructor is not well designed. You're trying to do too much, for two very different use cases:
It's better to decide one of these ways, stick with it, and simplify the code accordingly.
This method is strange:
You allow calling it without params, or null param, to do nothing? This is messy and confusing. It would be better to require a param.
Btw, downloading a file with
This could use some reworking:
Like this:
You open a file for reading but you're not closing it:
The right way to do is using
I also renamed
If you want to temporarily comment out one line,
it's better to use
awk, sort | uniq -c | sort -r, sometimes within a loop, when you could implement all that more efficiently in Python. It's ugly to do this in multiple sub-shells and multiple processes. But that would take a substantial rewrite.You are not using the class attributes here:
class NginxReporter():
location = None
template = 'templates/default.html'
html = NoneSo you could just delete these from your class.
The constructor is not well designed. You're trying to do too much, for two very different use cases:
- In one use case, you use it just to set a logfile location, and then you call other methods to do the real work
- In another use case, the constructor would call the other methods
It's better to decide one of these ways, stick with it, and simplify the code accordingly.
This method is strange:
def scpdownload(self, remotelocation=None):
if remotelocation:
check_output(['scp', remotelocation, self.location])You allow calling it without params, or null param, to do nothing? This is messy and confusing. It would be better to require a param.
Btw, downloading a file with
scp doesn't sound like it belongs to the class. It violates the single responsibility principle: a class should have one clear purpose (parse nginx log files), and nothing else.This could use some reworking:
if out == '' and output.poll() != None:
break
if out != '':
block = outLike this:
if not out:
if not output.poll():
break
else:
block = outYou open a file for reading but you're not closing it:
file = open(self.template)
block = Template(file.read())
print block.substitute(self.html)The right way to do is using
with:with open(self.template) as fh:
block = Template(fh.read())
print block.substitute(self.html)I also renamed
file to fh because file is a class in Python 2.xIf you want to temporarily comment out one line,
it's better to use
# instead of """ ... """ which is recommended for multi-line comments and docstrings:report = NginxReporter('access.log')
#report.scpdownload("sshuser@server.com:/path/to/nginx/access.log")
report.parse()
report.output()Code Snippets
class NginxReporter():
location = None
template = 'templates/default.html'
html = Nonedef scpdownload(self, remotelocation=None):
if remotelocation:
check_output(['scp', remotelocation, self.location])if out == '' and output.poll() != None:
break
if out != '':
block = outif not out:
if not output.poll():
break
else:
block = outfile = open(self.template)
block = Template(file.read())
print block.substitute(self.html)Context
StackExchange Code Review Q#63501, answer score: 3
Revisions (0)
No revisions yet.