patternpythonMinor
Recursive download articles and dump to MongoDB
Viewed 0 times
dumpandmongodbrecursivearticlesdownload
Problem
This is simple code that recursively downloads articles from the public WordPress API:
import requests
from pymongo import MongoClient
client = MongoClient()
db = client.wp
payload = {
'number': 100
}
url = 'https://public-api.wordpress.com/rest/v1.1/sites/{site_id}/posts'
response = requests.get(url, params=payload).json()
found = response['found']
posts = response['posts']
print 'found {number} posts'.format(number=found)
db.posts.insert_many(posts)
def get_next_page(offset):
print 'Inserted we have {left} posts left to go'.format(left=abs(found-offset))
response = requests.get(url, params= {
'number': 100,
'offset': offset
}).json()
db.posts.insert_many(response['posts'])
if int(response['found']) > offset:
get_next_page(offset + 100)
get_next_page(100)Solution
The logic for getting 100 posts is duplicated for the case of the initial load and subsequent loads with an offset. It would be better to refactor in a way to eliminate the duplicated logic:
params = {'number': 100}
offset = 0
while True:
response = requests.get(url, params=params).json()
found = response['found']
posts = response['posts']
db.posts.insert_many(posts)
print 'Inserted {number} posts'.format(number=len(posts))
offset += 100
if offset >= found:
break
print 'We have {left} posts left to go'.format(left=found-offset)
params['offset'] = offsetCode Snippets
params = {'number': 100}
offset = 0
while True:
response = requests.get(url, params=params).json()
found = response['found']
posts = response['posts']
db.posts.insert_many(posts)
print 'Inserted {number} posts'.format(number=len(posts))
offset += 100
if offset >= found:
break
print 'We have {left} posts left to go'.format(left=found-offset)
params['offset'] = offsetContext
StackExchange Code Review Q#96827, answer score: 5
Revisions (0)
No revisions yet.