patternpythonMinor
Python Script to Search PirateBay
Viewed 0 times
scriptpiratebaypythonsearch
Problem
I've written a very basic Python 3 script to search ThePirateBay. Since the tracker doesn't have an API, I had to parse the HTML using BeautifulSoup. I'd like to get some reviews, I'm pretty sure the code is crap, so fire away. I'm also interested in projects I could read to improve my Python.
```
#!/usr/bin/env python3
from urllib.parse import quote
from urllib.request import urlopen
from bs4 import BeautifulSoup
from constants import *
import re
import webbrowser
class Torrent:
def __str__(self):
return "(%s / %s) (%s) (%s) (%d SE / %d LE) %s" % (self.category,
self.subcategory,
self.size,
self.date,
self.seeders,
self.leeches,
self.name)
def download(self):
webbrowser.open(self.magnet, autoraise=False)
def search(keywords, categories=[ALL, ], order=SEED_DESC):
url = '%s/search/%s/0/%d/%s' % (TPB_DOMAIN, quote(keywords), order,
','.join([str(x) for x in categories]))
return parse(url)
def parse(url):
META_REGEX_FORMAT = "Uploaded (.), Size (.), ULed by (.*)"
UNICODE_BLANK = '\xa0'
BLANK = ' '
ROWS = 'tr'
DATA = 'td'
soup = BeautifulSoup(urlopen(url).read())
torrents = []
for result in soup.find_all(ROWS)[1:]:
torrent = Torrent()
data = result.find_all(DATA)
a = data[0].find_all('a')
torrent.category = a[0].string
torrent.subcategory = a[1].string
a = data[1].find_all('a')
torrent.link = a[0]['href']
torrent.name = a[0].string
torrent.magnet = a[1]['href']
torrent.user = data[1].font.a
if torrent.user is not None:
```
#!/usr/bin/env python3
from urllib.parse import quote
from urllib.request import urlopen
from bs4 import BeautifulSoup
from constants import *
import re
import webbrowser
class Torrent:
def __str__(self):
return "(%s / %s) (%s) (%s) (%d SE / %d LE) %s" % (self.category,
self.subcategory,
self.size,
self.date,
self.seeders,
self.leeches,
self.name)
def download(self):
webbrowser.open(self.magnet, autoraise=False)
def search(keywords, categories=[ALL, ], order=SEED_DESC):
url = '%s/search/%s/0/%d/%s' % (TPB_DOMAIN, quote(keywords), order,
','.join([str(x) for x in categories]))
return parse(url)
def parse(url):
META_REGEX_FORMAT = "Uploaded (.), Size (.), ULed by (.*)"
UNICODE_BLANK = '\xa0'
BLANK = ' '
ROWS = 'tr'
DATA = 'td'
soup = BeautifulSoup(urlopen(url).read())
torrents = []
for result in soup.find_all(ROWS)[1:]:
torrent = Torrent()
data = result.find_all(DATA)
a = data[0].find_all('a')
torrent.category = a[0].string
torrent.subcategory = a[1].string
a = data[1].find_all('a')
torrent.link = a[0]['href']
torrent.name = a[0].string
torrent.magnet = a[1]['href']
torrent.user = data[1].font.a
if torrent.user is not None:
Solution
By reading the code the only thing i could
thought to improve your code is to change this
and this one
So you won't have to compile your regex each time
thought to improve your code is to change this
def search_engine():
try:
pattern = re.compile('"(.+)" \-(.*) \-(.*)')
while True:
kw, cat, order = get_query(pattern)and this one
def get_query(pattern):
text = input("Search: ")
m = pattern.match(text)So you won't have to compile your regex each time
Code Snippets
def search_engine():
try:
pattern = re.compile('"(.+)" \-(.*) \-(.*)')
while True:
kw, cat, order = get_query(pattern)def get_query(pattern):
text = input("Search: ")
m = pattern.match(text)Context
StackExchange Code Review Q#36909, answer score: 2
Revisions (0)
No revisions yet.