scrape seller shop

This commit is contained in:
Hendrik Schutter 2022-05-31 11:22:28 +02:00
parent 7add4df512
commit 2d14bf4c2a
3 changed files with 65 additions and 1 deletions

File diff suppressed because one or more lines are too long

63
search_seller.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31
Date of last modification: 2022/05/31
"""
from bs4 import BeautifulSoup
import datetime
from tinydb import TinyDB, Query
import urllib3
import sys
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url):
http = urllib3.PoolManager()
r = http.request("GET", url)
return BeautifulSoup(r.data,'lxml')
def seller_listings(seller_name, db):
#sort by newest listing
#display page 1 (first)
#EU only
#60 listings in one result page
page_counter = 1
last_result_page = False
while not last_result_page:
result_page_added = 0
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter)
print ("Web Page: ", url)
soup = make_soup(url)
results = soup.find_all("li", class_="sresult lvresult clearfix li")
for result in results:
try:
rec = {
'epid': result.a['href'].split("?", 1)[0],
}
#check if listing is allready stored
if not db.search(Query().epid == rec["epid"]):
result_page_added += 1
db.insert(rec)
except (AttributeError, KeyError) as ex:
pass
if (result_page_added == 0):
last_result_page = True
page_counter += 1
if __name__ == "__main__":
seller_db = TinyDB("seller_db.json")
seller_listings("electro-network*com", seller_db)
print(len(seller_db.all()))

1
seller_db.json Normal file

File diff suppressed because one or more lines are too long