eBayCompetitorPriceCompare/search_seller.py

85 lines
2.2 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31
Date of last modification: 2022/05/31
"""
from bs4 import BeautifulSoup
import datetime
from tinydb import TinyDB, Query
import urllib3
import sys
import random
import helper
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url):
user_agent = {"user-agent": helper.get_random_user_agent()}
# print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url)
return BeautifulSoup(r.data, "lxml")
def seller_listings(seller_name, db):
# sort by newest listing
page_counter = 1
last_result_page = False
while not last_result_page:
result_page_added = 0
if page_counter == 1:
url = (
"https://www.ebay.de/sch/m.html?_ssn="
+ seller_name
+ "&_sop=10&_pgn="
+ str(page_counter)
)
else:
url = (
"https://www.ebay.de/sch/m.html?_ssn="
+ seller_name
+ "&_sop=10&_pgn="
+ str(page_counter)
+ "&_skc="
+ str(60 * (page_counter - 1))
+ "&rt=nc"
)
# print ("Web Page: ", url)
soup = make_soup(url)
results = soup.find_all(
"li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom"
)
for result in results:
# print(result)
try:
rec = {
"epid": result.a["href"].split("?", 1)[0],
}
# check if listing is allready stored
if not db.search(Query().epid == rec["epid"]):
result_page_added += 1
db.insert(rec)
except (AttributeError, KeyError) as ex:
pass
if result_page_added == 0:
last_result_page = True
page_counter += 1
if __name__ == "__main__":
seller_db = TinyDB("seller_db.json")
seller_listings("electro-network*com", seller_db)
print(len(seller_db.all()))