#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 Date of last modification: 2022/05/31 """ from bs4 import BeautifulSoup import datetime from tinydb import TinyDB, Query import urllib3 import sys import random import helper urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def make_soup(url): user_agent = {"user-agent": helper.get_random_user_agent()} # print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) return BeautifulSoup(r.data, "lxml") def seller_listings(seller_name, db): # sort by newest listing page_counter = 1 last_result_page = False while not last_result_page: result_page_added = 0 if page_counter == 1: url = ( "https://www.ebay.de/sch/m.html?_ssn=" + seller_name + "&_sop=10&_pgn=" + str(page_counter) ) else: url = ( "https://www.ebay.de/sch/m.html?_ssn=" + seller_name + "&_sop=10&_pgn=" + str(page_counter) + "&_skc=" + str(60 * (page_counter - 1)) + "&rt=nc" ) # print ("Web Page: ", url) soup = make_soup(url) results = soup.find_all( "li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom" ) for result in results: # print(result) try: rec = { "epid": result.a["href"].split("?", 1)[0], } # check if listing is allready stored if not db.search(Query().epid == rec["epid"]): result_page_added += 1 db.insert(rec) except (AttributeError, KeyError) as ex: pass if result_page_added == 0: last_result_page = True page_counter += 1 if __name__ == "__main__": seller_db = TinyDB("seller_db.json") seller_listings("electro-network*com", seller_db) print(len(seller_db.all()))