patternpythonMinor
Improve on python public interface data parser
Viewed 0 times
publicparserimproveinterfacepythondata
Problem
I would love some thoughts on this class that a wrote to extract user information from a logfile line.
I'm concerned about design patterns and the complexity of the code. The extract_user method feels especially awkward.
The class should be used as a public interface ala:
Now, the majority of the strings that I pass in will simply not be authentication strings. How do I make this available as some sort of response code / error-message to an end user? Right now I'm using self.error_message = yadayada. But I have a feeling this is not very pythonic.
I didn't write any documentation yet since I'm very uncertain about the general design.
Every detail is appreciated, I'm trying to get better at this.
```
import re
import logging
import os
import ConfigParser
class UserExtraction():
def __init__(self, string):
self.string = string
def _string_identification(self):
identifiers = {
'Successfully logged in' : 'login',
'logout' : 'logout',
}
regex_patterns = {
#TODO Store regexpatterns in outside file
"login" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: \[([0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}) ## (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\] (.+?) - Successfully .+? Role: (.+?),",
"logout" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: Unable to ping (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}), going to logout user (.+)"
}
for key, value in identifiers.iteritems():
if key in self.string:
self.event_type = value
self.regex = regex_patterns[value]
return True
return False
def _matching(self):
self.matches = re.search(self.regex, self.string)
if self.matches:
return True
else:
I'm concerned about design patterns and the complexity of the code. The extract_user method feels especially awkward.
The class should be used as a public interface ala:
parser = UserExtraction(s)
data = parser.extract_user()Now, the majority of the strings that I pass in will simply not be authentication strings. How do I make this available as some sort of response code / error-message to an end user? Right now I'm using self.error_message = yadayada. But I have a feeling this is not very pythonic.
I didn't write any documentation yet since I'm very uncertain about the general design.
Every detail is appreciated, I'm trying to get better at this.
```
import re
import logging
import os
import ConfigParser
class UserExtraction():
def __init__(self, string):
self.string = string
def _string_identification(self):
identifiers = {
'Successfully logged in' : 'login',
'logout' : 'logout',
}
regex_patterns = {
#TODO Store regexpatterns in outside file
"login" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: \[([0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}) ## (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\] (.+?) - Successfully .+? Role: (.+?),",
"logout" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: Unable to ping (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}), going to logout user (.+)"
}
for key, value in identifiers.iteritems():
if key in self.string:
self.event_type = value
self.regex = regex_patterns[value]
return True
return False
def _matching(self):
self.matches = re.search(self.regex, self.string)
if self.matches:
return True
else:
Solution
class UserExtraction():Either put
object as an explicit base class, or don't have the parens.def __init__(self, string):
self.string = string
def _string_identification(self):
identifiers = {
'Successfully logged in' : 'login',
'logout' : 'logout',
}
regex_patterns = {
#TODO Store regexpatterns in outside file
"login" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: \[([0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}) ## (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\] (.+?) - Successfully .+? Role: (.+?),",
"logout" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: Unable to ping (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}), going to logout user (.+)"
}I'd combine these two dictionaries into one list of tuples. I'd also probably make a global constant. Its not clear to me that putting it into an external file would help.
for key, value in identifiers.iteritems():You don't gain much using iteritems here
if key in self.string:
self.event_type = value
self.regex = regex_patterns[value]Don't pass data between your method by storing it on the object. That just obscures what is happening. Anything the caller needs to know should be returned not stored.
return True
return False
def _matching(self):
self.matches = re.search(self.regex, self.string)
if self.matches:
return True
else:
return FalseJust
return matches or return bool(matches) rather then introducing an if block to convert a bool into a bool.def _match_to_dict(self):
matches = self.matches
if self.event_type == 'login':
self.userinfo = {
'date_time' : matches.group(1),
'mac_addr' : matches.group(2),
'ip_addr' : matches.group(3),
'login' : matches.group(4),
'role' : matches.group(5)
}
elif self.event_type == 'logout':
self.userinfo = {
'date_time' : matches.group(1),
'ip_addr' : matches.group(2),
'login' : matches.group(3)
}Python's regular expressions have a named group feature. You can name the groups in the regular expression and thus simplify this code.
if self.userinfo:
return True
def extract_user(self):
if self._string_identification() == True:
if self._matching() == True:
if self._match_to_dict() == True:Don't use
== True. Just if self._string_identification():return self.userinfo
else:
self.error_message = "COULD NOT PARSE USERINFO INTO DICTIONARY"
else:
self.error_message = "COULD NOT MATCH AUTHENTICATION STRING"
else:
self.error_message = "NOT AN AUTHENTICATION STRING"This is python! Throw exceptions, don't store error messages.
if __name__ == "__main__":It's usually best to put your main code in a main function that you call from here.
ROOT_PATH = os.path.dirname(__file__)
config = ConfigParser.ConfigParser()
config.readfp(open(os.path.join(ROOT_PATH + '/settings.cfg')))
filename = config.get('defaults', 'file_location')
LOG_FILENAME = config.get('defaults', 'log_location')
LOG_FORMAT = '%(asctime)s - %(levelname)s %(message)s'
LOG_LEVEL = config.get('defaults', 'logging_level')Convention says that ALL_CAPS is for constants. These aren't constants
#TODO Change datefmt to be consistent with Apache logfile
logging.basicConfig(filename=LOG_FILENAME, format=LOG_FORMAT, datefmt='%m/%d %I:%M:%S', level=eval(LOG_LEVEL))
sample_strings = []
sample_strings.append('''Jan 2 15:32:49 cam-1/10.0.0.1 Authentication: [01:26:b2:F8:39:27 ## 172.16.197.23] Anonymous - Successfully logged in, Provider: anon, L2 MAC address: 00:26:B2:F2:39:87, Role: User, OS: Mac OS 10.6''')Considering combining these two lines into a string literal.
for s in sample_strings:Avoid single letter variable names, they are hard to follow
Processor = UserExtraction(s)
Processor.extract_user()By convetion Processor should be the name of a class. Local variables should undercase_with_underscores. Furthermore, should this even be a class? It looks to me more like the job for a function.
if hasattr(Processor, 'userinfo'):
print(Processor.userinfo)
else:
print(Processor.error_message)Here's me reworking of your code:
```
import re
import logging
import os
import ConfigParser
PATTERNS = [
('Successfully logged in', "(?P\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authenticatio
Code Snippets
class UserExtraction():def __init__(self, string):
self.string = string
def _string_identification(self):
identifiers = {
'Successfully logged in' : 'login',
'logout' : 'logout',
}
regex_patterns = {
#TODO Store regexpatterns in outside file
"login" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: \[([0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}) ## (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\] (.+?) - Successfully .+? Role: (.+?),",
"logout" : "(\w{3}.+?\d{1,2} \d{2}:\d{2}:\d{2}).+?Authentication: Unable to ping (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}), going to logout user (.+)"
}for key, value in identifiers.iteritems():if key in self.string:
self.event_type = value
self.regex = regex_patterns[value]return True
return False
def _matching(self):
self.matches = re.search(self.regex, self.string)
if self.matches:
return True
else:
return FalseContext
StackExchange Code Review Q#8878, answer score: 2
Revisions (0)
No revisions yet.