scrape seller shop
This commit is contained in:
parent
7add4df512
commit
2d14bf4c2a
File diff suppressed because one or more lines are too long
63
search_seller.py
Normal file
63
search_seller.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||||
|
Date of creation: 2022/05/31
|
||||||
|
Date of last modification: 2022/05/31
|
||||||
|
"""
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import datetime
|
||||||
|
from tinydb import TinyDB, Query
|
||||||
|
import urllib3
|
||||||
|
import sys
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
def make_soup(url):
|
||||||
|
http = urllib3.PoolManager()
|
||||||
|
r = http.request("GET", url)
|
||||||
|
return BeautifulSoup(r.data,'lxml')
|
||||||
|
|
||||||
|
def seller_listings(seller_name, db):
|
||||||
|
#sort by newest listing
|
||||||
|
#display page 1 (first)
|
||||||
|
#EU only
|
||||||
|
#60 listings in one result page
|
||||||
|
|
||||||
|
page_counter = 1
|
||||||
|
last_result_page = False
|
||||||
|
|
||||||
|
while not last_result_page:
|
||||||
|
result_page_added = 0
|
||||||
|
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter)
|
||||||
|
print ("Web Page: ", url)
|
||||||
|
|
||||||
|
soup = make_soup(url)
|
||||||
|
results = soup.find_all("li", class_="sresult lvresult clearfix li")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
try:
|
||||||
|
rec = {
|
||||||
|
'epid': result.a['href'].split("?", 1)[0],
|
||||||
|
}
|
||||||
|
#check if listing is allready stored
|
||||||
|
if not db.search(Query().epid == rec["epid"]):
|
||||||
|
result_page_added += 1
|
||||||
|
db.insert(rec)
|
||||||
|
|
||||||
|
except (AttributeError, KeyError) as ex:
|
||||||
|
pass
|
||||||
|
if (result_page_added == 0):
|
||||||
|
last_result_page = True
|
||||||
|
|
||||||
|
page_counter += 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
seller_db = TinyDB("seller_db.json")
|
||||||
|
seller_listings("electro-network*com", seller_db)
|
||||||
|
print(len(seller_db.all()))
|
||||||
|
|
1
seller_db.json
Normal file
1
seller_db.json
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user