snippetpythonMinor
Parse HTTP header using Python and tcpflow
Viewed 0 times
headerparsehttppythonusingandtcpflow
Problem
I wrote a program that reads a pcap file and parses the HTTP traffic in the pcap to generate a dictionary that contains HTTP headers for each request and response in this pcap.
My code does the following:
I test my code with multiple test cases, but honestly I don't have a good experience in Python, so could anyone check it for me please?
```
import os
from os import listdir
from os.path import isfile, join
from StringIO import StringIO
import mimetools
def getFields(headers):
fields={}
i=1
for header in headers:
if len(header)==0:
continue
# if this line is complement for the previous line
if header.find(" ")==0 or
header.find("\t")==0:
continue
if len(header.split(":"))>=2:
key = header.split(":")[0].strip()
# if the key has multiple values such as cookie
if fields.has_key(key):
fields[key]=fields[key]+" "+header[header.find(":")+1:].strip()
else:
fields[key]=header[header.find(":")+1:].strip()
while headers[i].find(" ")==0 or
headers[i].find("\t")==0 :
fields[key]=fields[key]+" "+headers[i].strip()
i=i+1
# end of the while loop
# end of the else
else:
# else for [if len(header.split(":"))>=2: ]
print "ERROR: RFC VIOLATION"
# end of the for loop
return fields
def main():
# you have to write it in the terminal "cd /home/user/Desktop/empty-dir"
os.system("tcpflow -r /home/user/Desktop/12.pcap -v")
for f in listdir("/home/user/Desktop/empty-dir"):
if f.find("80")==19 or f.find(
My code does the following:
- Uses tcpflow to reassemble the tcp segments
- Read the files generated by tcpflow and check if it related to HTTP
- If the file contains HTTP traffic, my code will read the file and generate a corresponding dictionary that contains the HTTP header fields.
I test my code with multiple test cases, but honestly I don't have a good experience in Python, so could anyone check it for me please?
```
import os
from os import listdir
from os.path import isfile, join
from StringIO import StringIO
import mimetools
def getFields(headers):
fields={}
i=1
for header in headers:
if len(header)==0:
continue
# if this line is complement for the previous line
if header.find(" ")==0 or
header.find("\t")==0:
continue
if len(header.split(":"))>=2:
key = header.split(":")[0].strip()
# if the key has multiple values such as cookie
if fields.has_key(key):
fields[key]=fields[key]+" "+header[header.find(":")+1:].strip()
else:
fields[key]=header[header.find(":")+1:].strip()
while headers[i].find(" ")==0 or
headers[i].find("\t")==0 :
fields[key]=fields[key]+" "+headers[i].strip()
i=i+1
# end of the while loop
# end of the else
else:
# else for [if len(header.split(":"))>=2: ]
print "ERROR: RFC VIOLATION"
# end of the for loop
return fields
def main():
# you have to write it in the terminal "cd /home/user/Desktop/empty-dir"
os.system("tcpflow -r /home/user/Desktop/12.pcap -v")
for f in listdir("/home/user/Desktop/empty-dir"):
if f.find("80")==19 or f.find(
Solution
New Lines and indentations help the interpreter know where the code terminates and blocks end, you have to be super careful with them
Like in your if condition, you can't have a newline in between the conditions.
This code will error out because you can't have a new line in your condition statement.
Python is New Line Terminated. It should read like this
Same with this piece of code
It should read:
Like in your if condition, you can't have a newline in between the conditions.
if header.find(" ")==0 or
header.find("\t")==0:
continueThis code will error out because you can't have a new line in your condition statement.
Python is New Line Terminated. It should read like this
if header.find(" ")==0 or header.find("\t")==0
continueSame with this piece of code
while headers[i].find(" ")==0 or
headers[i].find("\t")==0 :
fields[key]=fields[key]+" "+headers[i].strip()
i=i+1It should read:
while headers[i].find(" ")==0 or headers[i].find("\t")==0 :
fields[key]=fields[key]+" "+headers[i].strip()
i=i+1Code Snippets
if header.find(" ")==0 or
header.find("\t")==0:
continueif header.find(" ")==0 or header.find("\t")==0
continuewhile headers[i].find(" ")==0 or
headers[i].find("\t")==0 :
fields[key]=fields[key]+" "+headers[i].strip()
i=i+1while headers[i].find(" ")==0 or headers[i].find("\t")==0 :
fields[key]=fields[key]+" "+headers[i].strip()
i=i+1Context
StackExchange Code Review Q#57715, answer score: 5
Revisions (0)
No revisions yet.