#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 Date of last modification: 2022/05/31 """ from bs4 import BeautifulSoup import datetime from tinydb import TinyDB, Query import urllib3 import sys import helper urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def make_soup(url): user_agent = {'user-agent': helper.get_random_user_agent()} #print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) return BeautifulSoup(r.data,'lxml') def search(search_term, max_pages): #sort by newest listing #display page 1 (first) #EU only #60 listings in one result page #No auction found_listings = set() page_counter = 1 last_result_page = False while not last_result_page: result_page_added = 0 url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn=' + str(page_counter) #url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter) #print ("Web Page: ", url) soup = make_soup(url) results = soup.find_all("div", class_="s-item__info clearfix") for result in results: try: #rec = { # 'epid': result.div.div.div.a['href'].split("?", 1)[0], #} #print(result) found_listings.add(result.a['href'].split("?", 1)[0]) #check if listing is allready stored #if not db.search(Query().epid == rec["epid"]): # result_page_added += 1 # db.insert(rec) except (AttributeError, KeyError) as ex: pass if (result_page_added == 0) or (page_counter == max_pages): last_result_page = True page_counter += 1 return found_listings if __name__ == "__main__": print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages = 4)))