patternpythonMinor
Downloading files using Python-requests
Viewed 0 times
downloadingfilespythonusingrequests
Problem
I wrote a Python script to download files using multiple (source) IP addresses -- kindly suggest any improvements.
```
import cgi
import os
import posixpath
import Queue
import threading
import urllib
import urlparse
import random
import re
import shutil
import time
import requests
import requests_toolbelt
def get_IPs():
"""Returns all available IP addresses in a list."""
# TODO: Windows only. Other options?
out = []
for i in os.popen('ipconfig'):
i = i.strip()
if i.startswith('IP'):
out.append(i.rsplit(' ', 1)[-1])
return out
def get_info(url):
"""Returns name and size of file to be downloaded."""
try:
resp = requests.head(url, allow_redirects=True)
name = cgi.parse_header(resp.headers['content-disposition'])[1]['filename']
except KeyError:
path = urlparse.urlsplit(url).path
name = posixpath.basename(path)
name = urllib.unquote_plus(name)
size = int(resp.headers['content-length'])
return name, size
def worker(url, session, ud, part, size):
"""Downloads a part of the file specified by 'part' parameter."""
# TODO: optimal tries, timeout?
for _ in xrange(2):
try:
open('%s/%04d' % (ud, part), 'wb').write(
session.get(url, timeout=(2, 7), headers={'range': 'bytes=%s-%s' % (
partchunk, min(size, partchunk + chunk - 1))}).content)
break
except:
pass
else:
worker(url, sessions_queue.get(), ud, part, size)
sessions_queue.put(session)
def summary(name, size, elapsed):
"""Prints summary of the download after it is completed."""
print (
'--\n'
'%s download completed.\n'
'Time elapsed: %.2fs\n'
'Average download speed: %.2f MB/s\n'
'--' % (name, elapsed, size/elapsed/2**20))
def download(url):
"""Downloads the file pointed to by 'url' parameter."""
start = time.clock()
name, size = get_info(u
```
import cgi
import os
import posixpath
import Queue
import threading
import urllib
import urlparse
import random
import re
import shutil
import time
import requests
import requests_toolbelt
def get_IPs():
"""Returns all available IP addresses in a list."""
# TODO: Windows only. Other options?
out = []
for i in os.popen('ipconfig'):
i = i.strip()
if i.startswith('IP'):
out.append(i.rsplit(' ', 1)[-1])
return out
def get_info(url):
"""Returns name and size of file to be downloaded."""
try:
resp = requests.head(url, allow_redirects=True)
name = cgi.parse_header(resp.headers['content-disposition'])[1]['filename']
except KeyError:
path = urlparse.urlsplit(url).path
name = posixpath.basename(path)
name = urllib.unquote_plus(name)
size = int(resp.headers['content-length'])
return name, size
def worker(url, session, ud, part, size):
"""Downloads a part of the file specified by 'part' parameter."""
# TODO: optimal tries, timeout?
for _ in xrange(2):
try:
open('%s/%04d' % (ud, part), 'wb').write(
session.get(url, timeout=(2, 7), headers={'range': 'bytes=%s-%s' % (
partchunk, min(size, partchunk + chunk - 1))}).content)
break
except:
pass
else:
worker(url, sessions_queue.get(), ud, part, size)
sessions_queue.put(session)
def summary(name, size, elapsed):
"""Prints summary of the download after it is completed."""
print (
'--\n'
'%s download completed.\n'
'Time elapsed: %.2fs\n'
'Average download speed: %.2f MB/s\n'
'--' % (name, elapsed, size/elapsed/2**20))
def download(url):
"""Downloads the file pointed to by 'url' parameter."""
start = time.clock()
name, size = get_info(u
Solution
You could rewrite your
In
You also multiple times open files, but you should always try to use
Once you leave that indented block, the file is automatically closed. No need to even call
get_IPs function to be a list comprehension instead:return [i.rsplit(' ', 1)[-1] for i in map(str.strip, os.popen('ipconfig'))
if i.startswith('IP')]map will call strip on all of the results from 'ipconfig' and then you can iterate over that, ignoring any values that don't start with "IP".In
worker you're using a loop to retry after timeouts. But you're just using 2 arbitrarily. Use a constant here so it's clear what you're doing, and easy to change later:You also multiple times open files, but you should always try to use
with, known as the context manager. It automatically closes the file even in the event that an error is raised. It's the safest way to open a file.with open(filepath) as filename:
execute_code_with(filename)
print("Done with filename")Once you leave that indented block, the file is automatically closed. No need to even call
filename.close().Code Snippets
return [i.rsplit(' ', 1)[-1] for i in map(str.strip, os.popen('ipconfig'))
if i.startswith('IP')]with open(filepath) as filename:
execute_code_with(filename)
print("Done with filename")Context
StackExchange Code Review Q#120852, answer score: 2
Revisions (0)
No revisions yet.