parse more than 4 result pages
This commit is contained in:
@ -11,33 +11,39 @@ import datetime
|
||||
from tinydb import TinyDB, Query
|
||||
import urllib3
|
||||
import sys
|
||||
import random
|
||||
|
||||
import helper
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
def make_soup(url):
|
||||
http = urllib3.PoolManager()
|
||||
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||
#print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data,'lxml')
|
||||
|
||||
def seller_listings(seller_name, db):
|
||||
#sort by newest listing
|
||||
#display page 1 (first)
|
||||
#EU only
|
||||
#60 listings in one result page
|
||||
|
||||
|
||||
page_counter = 1
|
||||
last_result_page = False
|
||||
|
||||
while not last_result_page:
|
||||
result_page_added = 0
|
||||
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter)
|
||||
print ("Web Page: ", url)
|
||||
|
||||
if page_counter == 1:
|
||||
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter)
|
||||
else:
|
||||
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) + '&_skc=' + str(60*(page_counter-1)) +'&rt=nc'
|
||||
|
||||
#print ("Web Page: ", url)
|
||||
|
||||
soup = make_soup(url)
|
||||
results = soup.find_all("li", class_="sresult lvresult clearfix li")
|
||||
|
||||
|
||||
|
||||
for result in results:
|
||||
try:
|
||||
rec = {
|
||||
@ -47,12 +53,10 @@ def seller_listings(seller_name, db):
|
||||
if not db.search(Query().epid == rec["epid"]):
|
||||
result_page_added += 1
|
||||
db.insert(rec)
|
||||
|
||||
except (AttributeError, KeyError) as ex:
|
||||
pass
|
||||
if (result_page_added == 0):
|
||||
last_result_page = True
|
||||
|
||||
page_counter += 1
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user