#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 Date of last modification: 2022/05/31 """ from bs4 import BeautifulSoup import datetime from tinydb import TinyDB, Query import urllib3 import sys import random import helper urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def make_soup(url): user_agent = {'user-agent': helper.get_random_user_agent()} #print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) return BeautifulSoup(r.data,'lxml') def seller_listings(seller_name, db): #sort by newest listing page_counter = 1 last_result_page = False while not last_result_page: result_page_added = 0 if page_counter == 1: url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) else: url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) + '&_skc=' + str(60*(page_counter-1)) +'&rt=nc' #print ("Web Page: ", url) soup = make_soup(url) results = soup.find_all("li", class_="sresult lvresult clearfix li") for result in results: try: rec = { 'epid': result.a['href'].split("?", 1)[0], } #check if listing is allready stored if not db.search(Query().epid == rec["epid"]): result_page_added += 1 db.insert(rec) except (AttributeError, KeyError) as ex: pass if (result_page_added == 0): last_result_page = True page_counter += 1 if __name__ == "__main__": seller_db = TinyDB("seller_db.json") seller_listings("electro-network*com", seller_db) print(len(seller_db.all()))