patternpythonMinor
FASTA file processing using Python to invoke external filters
Viewed 0 times
fastafilepythonusingexternalfiltersinvokeprocessing
Problem
I am very new to programming and this is my first functional code. It works fine but I'm sure that I could use a lot of optimization. If you see any blunders or would be able to help condense the script that would be fantastic.
```
#!/usr/bin/python
import sys, getopt, subprocess, os, tempfile, shutil, time
file_name = sys.argv[2]
pwd = os.getcwd() + "/"
dirname = pwd + "Secretome_files"
file_location = dirname + '/'
try:
os.makedirs(dirname)
except OSError:
if os.path.exists(dirname):
pass
else:
raise
def singleline():
print "\nMaking fasta single line"
file_in = sys.argv[1]
file_out = open(file_location + file_name + "singleline.fasta", "w")
command = ("fasta_formatter -i " + file_in + " -w 0")
p1 = subprocess.Popen((command), stdout=file_out, shell=True)
p1.wait()
print "Fasta now single line"
def signalp():
singleline()
command = ("signalp -f short -m " + file_location + file_name + "removed_SigPep.fasta " + file_location + file_name + "singleline.fasta > "+ file_location + file_name + "signalpOUT.txt")
print "\nRunning SignalP"
signalpRUN = subprocess.Popen([command], shell=True)
signalpRUN.wait()
print "SignalP Complete"
print "\nCreating SignalP protein list"
command2 = ("fasta_formatter -i " + file_location + file_name + "removed_SigPep.fasta -t")
file_out2 = open(file_location + file_name + "removed_SigPep_tab.fasta.txt", "w")
tab = subprocess.Popen([command2], stdout=file_out2, shell=True)
tab.wait()
command3 = ("cut -f1,1 " + file_location + file_name + "removed_SigPep_tab.fasta.txt")
file_out3 = open(file_location + file_name + "listaftercut.txt", "w")
file_out4 = open(file_location + file_name + "goodlistSigP.txt", "w")
listGood = subprocess.Popen([command3], stdout=file_out3, shell=True)
listGood.wait()
openfile = open(file_location + file_name + "listaftercut.txt", 'r')
for line in
```
#!/usr/bin/python
import sys, getopt, subprocess, os, tempfile, shutil, time
file_name = sys.argv[2]
pwd = os.getcwd() + "/"
dirname = pwd + "Secretome_files"
file_location = dirname + '/'
try:
os.makedirs(dirname)
except OSError:
if os.path.exists(dirname):
pass
else:
raise
def singleline():
print "\nMaking fasta single line"
file_in = sys.argv[1]
file_out = open(file_location + file_name + "singleline.fasta", "w")
command = ("fasta_formatter -i " + file_in + " -w 0")
p1 = subprocess.Popen((command), stdout=file_out, shell=True)
p1.wait()
print "Fasta now single line"
def signalp():
singleline()
command = ("signalp -f short -m " + file_location + file_name + "removed_SigPep.fasta " + file_location + file_name + "singleline.fasta > "+ file_location + file_name + "signalpOUT.txt")
print "\nRunning SignalP"
signalpRUN = subprocess.Popen([command], shell=True)
signalpRUN.wait()
print "SignalP Complete"
print "\nCreating SignalP protein list"
command2 = ("fasta_formatter -i " + file_location + file_name + "removed_SigPep.fasta -t")
file_out2 = open(file_location + file_name + "removed_SigPep_tab.fasta.txt", "w")
tab = subprocess.Popen([command2], stdout=file_out2, shell=True)
tab.wait()
command3 = ("cut -f1,1 " + file_location + file_name + "removed_SigPep_tab.fasta.txt")
file_out3 = open(file_location + file_name + "listaftercut.txt", "w")
file_out4 = open(file_location + file_name + "goodlistSigP.txt", "w")
listGood = subprocess.Popen([command3], stdout=file_out3, shell=True)
listGood.wait()
openfile = open(file_location + file_name + "listaftercut.txt", 'r')
for line in
Solution
#!/usr/bin/python
import sys, getopt, subprocess, os, tempfile, shutil, time
file_name = sys.argv[2]
pwd = os.getcwd() + "/"
dirname = pwd + "Secretome_files"
file_location = dirname + '/'Use the function
os.path.join to create paths rather then adding the / yourself. try:
os.makedirs(dirname)
except OSError:
if os.path.exists(dirname):
pass
else:
raiseInstead do something like this:
except OSError as error:
if error.errno != errno.ENOENT:
raiseThis way you check the error code, which can already tell you whether it failed because the file already existed.
def singleline():
print "\nMaking fasta single line"
file_in = sys.argv[1]I recommend passing command line arguments in as parameters rather then fetching them here.
file_out = open(file_location + file_name + "singleline.fasta", "w")You should really close this file after you are done with it
command = ("fasta_formatter -i " + file_in + " -w 0")Those parens do nothing
p1 = subprocess.Popen((command), stdout=file_out, shell=True)The parens do nothing here.
p1.wait()You could use the function subprocess.check_call, which will take care of doing the wait, it'll also raise an exception if the program has an error which might be useful.
print "Fasta now single line"
def signalp():
singleline()Stylistically, I'd have the function singline return the location of its output file, and then use that here.
command = ("signalp -f short -m " + file_location + file_name + "removed_SigPep.fasta " + file_location + file_name + "singleline.fasta > "+ file_location + file_name + "signalpOUT.txt")Adding strings isn't very efficient. For subprocess, it makes the most sense to create a list of parameters and pass that the Popen constructor.
print "\nRunning SignalP"
signalpRUN = subprocess.Popen([command], shell=True)Why do you use the
> syntax here where you passed a file object earlier? I'd recommend being consistent.signalpRUN.wait()
print "SignalP Complete"
print "\nCreating SignalP protein list"
command2 = ("fasta_formatter -i " + file_location + file_name + "removed_SigPep.fasta -t")
file_out2 = open(file_location + file_name + "removed_SigPep_tab.fasta.txt", "w")
tab = subprocess.Popen([command2], stdout=file_out2, shell=True)
tab.wait()Your doing this basic idea several times. Write a function that takes the command line and the output file and does the execution.
command3 = ("cut -f1,1 " + file_location + file_name + "removed_SigPep_tab.fasta.txt")
file_out3 = open(file_location + file_name + "listaftercut.txt", "w")
file_out4 = open(file_location + file_name + "goodlistSigP.txt", "w")Wait to open this until you are using it
listGood = subprocess.Popen([command3], stdout=file_out3, shell=True)
listGood.wait()
openfile = open(file_location + file_name + "listaftercut.txt", 'r')I suggest using the
with open() as file: syntax to make sure files get closed. for line in openfile:
goodname = line.partition(' ')[0] + '\n'
file_out4.write(goodname)The output for this file would be better in its own function.
def sigpFasta():
command4 = ("faSomeRecords " + file_location + file_name + "singleline.fasta " + file_location + file_name + "goodlistSigP.txt " + file_location + file_name + "signalP_pass.fasta")
print "\nRetreving SignalP fasta"
fastaRUN = subprocess.Popen([command4], shell=True)
fastaRUN.wait()
def tmhmm():
command = ("tmhmm " + file_location + file_name + "removed_SigPep.fasta")
file_out = open(file_location + file_name + "tmhmmOUT.txt", "w")
print "\nRunning tmhmm on mature signalp sequences only"
tmhmmRUN = subprocess.Popen([command], stdout=file_out, shell=True)
tmhmmRUN.wait()Another place to use that function. Also, rather then all the print chatter, I suggest having it print the commands its executing.
print "tmhmm complete"
print "\nIdentifying sequences without tm regions."
openfile = open(file_location + file_name + "tmhmmOUT.txt", "r")
file_out2 = open(file_location + file_name + "tmhmmGoodlist.txt", "a")
for line in openfile:
if "\tPredHel=0\t" in line:
goodname = line.partition('\t')[0] + '\n'
file_out2.write(goodname)Rather then reopenning and appending to Goodlist a bunch of times, I suggest one function that opens each of the files you pull from and writes them. That way you only have to open the Godolist once.
```
def targetp():
command = ("targetp -N " + file_location + file_name + "signalP_pass.fasta")
file_out = open(file_location + file_name + "targetpOUT.txt", "w")
print "\nRunning TargetP on SignalP pass seqeunces only"
targetpRUN = subprocess.Popen([command], stdout=file_out, shell=True)
ta
Code Snippets
#!/usr/bin/python
import sys, getopt, subprocess, os, tempfile, shutil, time
file_name = sys.argv[2]
pwd = os.getcwd() + "/"
dirname = pwd + "Secretome_files"
file_location = dirname + '/'try:
os.makedirs(dirname)
except OSError:
if os.path.exists(dirname):
pass
else:
raiseexcept OSError as error:
if error.errno != errno.ENOENT:
raisedef singleline():
print "\nMaking fasta single line"
file_in = sys.argv[1]file_out = open(file_location + file_name + "singleline.fasta", "w")Context
StackExchange Code Review Q#19695, answer score: 6
Revisions (0)
No revisions yet.