From ebf379a71653101ade96dd42fc21b8ec6ce0ceba Mon Sep 17 00:00:00 2001 From: localhorst Date: Sun, 18 Aug 2024 22:49:48 +0200 Subject: [PATCH] only compare items in same category --- compare.py | 135 ++++++++++++++++++++++++++-------------------- export_html.py | 108 ++++++++++++++++++++++--------------- helper.py | 22 ++++---- scrape_listing.py | 61 ++++++++++++++------- search_eBay.py | 63 ++++++++++++---------- search_seller.py | 44 +++++++++------ 6 files changed, 259 insertions(+), 174 deletions(-) diff --git a/compare.py b/compare.py index c3db57b..d664714 100644 --- a/compare.py +++ b/compare.py @@ -1,9 +1,8 @@ - #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 - Date of last modification: 2022/05/31 + Date of last modification: 2024/08/18 """ from bs4 import BeautifulSoup @@ -18,101 +17,123 @@ import export_html as exhtml if __name__ == "__main__": seller_db = TinyDB("seller_db.json") - database_lenght = len(seller_db.all()) + database_length = len(seller_db.all()) comparison_results = list() seller_listing_data = False - if database_lenght == 0: + if database_length == 0: print("Unable to load seller shop database!") sys.exit(-1) - #print("Loaded seller shop database: " + str(database_lenght) + " listings") - pbar = tqdm(total=database_lenght) #print progress ba - compare_start_timestamp = datetime.now() #set start time for comparing + # print("Loaded seller shop database: " + str(database_lenght) + " listings") + pbar = tqdm(total=database_length) # print progress ba + compare_start_timestamp = datetime.now() # set start time for comparing for shop_listing_url in seller_db.all(): - #print(shop_listing_url["epid"]) - for retrieve_counter in range(5): + # print(shop_listing_url["epid"]) + for retrieve_counter in range(5): try: seller_listing_data = sl.scrape_listing(shop_listing_url["epid"]) break except: pass - #print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"]) - + # print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"]) + if seller_listing_data: #print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"]) comparison_result = { - 'seller_listing': seller_listing_data, - 'competitor_listings': list(), - 'max_price_delta': float(0.0) + "seller_listing": seller_listing_data, + "competitor_listings": list(), + "max_price_delta": float(0.0), } - competitor_listings = se.search(seller_listing_data["title"], 1) - #print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"]) + competitor_listings = se.search(seller_listing_data["title"], 1) + #print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"]) sys.stdout.flush() cheaper_listings = list() for competitor_listing_url in competitor_listings: - #print(competitor_listing_url) - #print(shop_listing_url["epid"]) + print(competitor_listing_url) + # print(shop_listing_url["epid"]) if seller_db.search(Query().epid == competitor_listing_url): - #print("Found listing from sellers shop --> ignore " + competitor_listing_url) + # print("Found listing from sellers shop --> ignore " + competitor_listing_url) continue - #else: - #print("not from own shop") + # else: + # print("not from own shop") - for retrieve_counter_competitor in range(5): + competitor_listing_data = False + for retrieve_counter_competitor in range(5): try: - #print("scraping : " + competitor_listing_url) - competitor_listing_data = sl.scrape_listing(competitor_listing_url) + # print("scraping : " + competitor_listing_url) + competitor_listing_data = sl.scrape_listing( + competitor_listing_url + ) break except: pass - #print("Unable to retrieve competitor listing data from: " + competitor_listing_url) + # print("Unable to retrieve competitor listing data from: " + competitor_listing_url) if competitor_listing_data: - #print(competitor_listing_data["price"]) - if (competitor_listing_data["price"] < seller_listing_data["price"]) and (competitor_listing_data["directbuy"] == True): - #print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"])) - cheaper_listings.append({ - 'title': competitor_listing_data["title"], - 'price': competitor_listing_data["price"], - 'image': competitor_listing_data["image"], - 'url': competitor_listing_url}) + # print(competitor_listing_data["price"]) - for cheaper_listing in sorted(cheaper_listings, key=lambda d: d['price']) : - #print(cheaper_listing) - #print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"]) - comparison_result['competitor_listings'].append(cheaper_listing) - if comparison_result['max_price_delta'] == 0.0: - comparison_result['max_price_delta'] = seller_listing_data["price"] - cheaper_listing["price"] + if (competitor_listing_data["category"] != seller_listing_data["category"]): + #print("Found competitor_listing is not in same category!") + continue + + if competitor_listing_data["directbuy"] != True: + #print("Found competitor_listing is not direct buy!") + continue + + if (competitor_listing_data["price"] < seller_listing_data["price"]): + # print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"])) + cheaper_listings.append( + { + "title": competitor_listing_data["title"], + "price": competitor_listing_data["price"], + "image": competitor_listing_data["image"], + "url": competitor_listing_url, + } + ) + + for cheaper_listing in sorted(cheaper_listings, key=lambda d: d["price"]): + # print(cheaper_listing) + # print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"]) + comparison_result["competitor_listings"].append(cheaper_listing) + if comparison_result["max_price_delta"] == 0.0: + comparison_result["max_price_delta"] = ( + seller_listing_data["price"] - cheaper_listing["price"] + ) if cheaper_listings: comparison_results.append(comparison_result) pbar.update(1) - #break + # break pbar.close() - print("\nFinished comparing! Found " + str(len(comparison_results)) + " possibly cheaper listings") + print( + "\nFinished comparing! Found " + + str(len(comparison_results)) + + " possibly cheaper listings" + ) - now = datetime.now() # current date and time + now = datetime.now() # current date and time - duration_compare = datetime.now() - compare_start_timestamp + duration_compare = datetime.now() - compare_start_timestamp hours, remainder = divmod(duration_compare.total_seconds(), 3600) minutes, seconds = divmod(remainder, 60) - duration_compare = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" + duration_compare = ( + str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" + ) exp = exhtml.exporter("./html_out/") - for comparison in sorted(comparison_results, key=lambda d: d['max_price_delta'], reverse=True): - exp.export_comparison(comparison['seller_listing'], comparison['competitor_listings']) + for comparison in sorted( + comparison_results, key=lambda d: d["max_price_delta"], reverse=True + ): + exp.export_comparison( + comparison["seller_listing"], comparison["competitor_listings"] + ) - exp.export_startpage(str(database_lenght), len(comparison_results), duration_compare, now.strftime("%m/%d/%Y, %H:%M:%S")) - - - - - - - - - - \ No newline at end of file + exp.export_startpage( + str(database_length), + len(comparison_results), + duration_compare, + now.strftime("%m/%d/%Y, %H:%M:%S"), + ) diff --git a/export_html.py b/export_html.py index b226c85..58ac253 100644 --- a/export_html.py +++ b/export_html.py @@ -1,4 +1,3 @@ - #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com @@ -11,41 +10,52 @@ import os import template_html as thtml import shutil + class exporter: - export_dir="" + export_dir = "" tsStart = 0 counter = 0 + def __init__(self, path): self.export_dir = path - self.tsStart = datetime.now() #set start time for exporting + self.tsStart = datetime.now() # set start time for exporting try: os.mkdir(self.export_dir) except FileExistsError: pass try: - os.mkdir(os.path.join(self.export_dir,"compare/")) + os.mkdir(os.path.join(self.export_dir, "compare/")) except FileExistsError: pass self.copy_static_export() - + def copy_static_export(self): try: - os.mkdir(os.path.join(self.export_dir,"css/")) + os.mkdir(os.path.join(self.export_dir, "css/")) except FileExistsError: pass - try: - os.mkdir(os.path.join(self.export_dir,"data/")) + try: + os.mkdir(os.path.join(self.export_dir, "data/")) except FileExistsError: pass - shutil.copy("./html/css/w3.css", os.path.join(self.export_dir,"css/","w3.css")) - shutil.copy("./html/data/favicon.ico", os.path.join(self.export_dir,"data/","favicon.ico")) - shutil.copy("./html/data/icon.png", os.path.join(self.export_dir,"data/","icon.png")) + shutil.copy( + "./html/css/w3.css", os.path.join(self.export_dir, "css/", "w3.css") + ) + shutil.copy( + "./html/data/favicon.ico", + os.path.join(self.export_dir, "data/", "favicon.ico"), + ) + shutil.copy( + "./html/data/icon.png", os.path.join(self.export_dir, "data/", "icon.png") + ) def export_comparison(self, seller_listing, competitor_listings): - self.counter +=1 - f = open(os.path.join(self.export_dir, "compare/", str(self.counter) + ".html"), "a") + self.counter += 1 + f = open( + os.path.join(self.export_dir, "compare/", str(self.counter) + ".html"), "a" + ) f.write(thtml.html_comparison_head()) f.write("") @@ -54,58 +64,72 @@ class exporter: f.write(thtml.html_comparison_competitor_list_header()) competitor_listing_counter = 0 for competitor_listing in competitor_listings: - competitor_listing_counter +=1 - f.write(thtml.html_comparison_competitor_listing(competitor_listing, competitor_listing_counter)) + competitor_listing_counter += 1 + f.write( + thtml.html_comparison_competitor_listing( + competitor_listing, competitor_listing_counter + ) + ) f.write(thtml.html_comparison_trailer()) f.close() - def export_startpage(self, seller_listings_count, cheaper_listings_count, compare_time, date): + def export_startpage( + self, seller_listings_count, cheaper_listings_count, compare_time, date + ): - duration_export = datetime.now() - self.tsStart + duration_export = datetime.now() - self.tsStart hours, remainder = divmod(duration_export.total_seconds(), 3600) minutes, seconds = divmod(remainder, 60) - duration_export = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" + duration_export = ( + str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" + ) f = open(os.path.join(self.export_dir, "index.html"), "a") f.write(thtml.html_startpage_head()) - f.write(thtml.html_startpage_info(seller_listings_count, cheaper_listings_count, compare_time, duration_export, date)) + f.write( + thtml.html_startpage_info( + seller_listings_count, + cheaper_listings_count, + compare_time, + duration_export, + date, + ) + ) f.write(thtml.html_startpage_trailer()) f.close() - + + if __name__ == "__main__": seller_listing_dummy = { - 'title': "Seller Title", - 'price': float(42.42), - 'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", - 'url' : "https://www.ebay.de/itm/165508291809" + "title": "Seller Title", + "price": float(42.42), + "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", + "url": "https://www.ebay.de/itm/165508291809", } competitor_listings_dummy = [ { - 'title': "Competitor Title 01", - 'price': float(40.42), - 'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", - 'url' : "https://www.ebay.de/itm/165508291809" - }, + "title": "Competitor Title 01", + "price": float(40.42), + "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", + "url": "https://www.ebay.de/itm/165508291809", + }, { - 'title': "Competitor Title 02", - 'price': float(41.42), - 'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", - 'url' : "https://www.ebay.de/itm/165508291809" - }, + "title": "Competitor Title 02", + "price": float(41.42), + "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", + "url": "https://www.ebay.de/itm/165508291809", + }, { - 'title': "Competitor Title 03", - 'price': float(42.00), - 'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", - 'url' : "https://www.ebay.de/itm/165508291809" - } + "title": "Competitor Title 03", + "price": float(42.00), + "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", + "url": "https://www.ebay.de/itm/165508291809", + }, ] exp = exporter("./html_out/") exp.export_comparison(seller_listing_dummy, competitor_listings_dummy) exp.export_startpage(10, 2, 0, "d") - - - diff --git a/helper.py b/helper.py index 98a2f15..89edfc9 100644 --- a/helper.py +++ b/helper.py @@ -1,4 +1,3 @@ - #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com @@ -7,15 +6,16 @@ """ import random + def get_random_user_agent(): uastrings = [ - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",\ - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",\ - "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\ - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\ - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",\ - "Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",\ - "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0"\ - ] - - return random.choice(uastrings)+str(random.randrange(255)) \ No newline at end of file + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25", + "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10", + "Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403", + "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0", + ] + + return random.choice(uastrings) + str(random.randrange(255)) diff --git a/scrape_listing.py b/scrape_listing.py index 2390f23..0d77a39 100644 --- a/scrape_listing.py +++ b/scrape_listing.py @@ -1,9 +1,8 @@ - #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 - Date of last modification: 2022/05/31 + Date of last modification: 2024/08/18 """ from bs4 import BeautifulSoup @@ -15,34 +14,56 @@ import helper urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + def make_soup(url): - user_agent = {'user-agent': helper.get_random_user_agent()} - #print(user_agent) + user_agent = {"user-agent": helper.get_random_user_agent()} + # print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) - return BeautifulSoup(r.data,'lxml') + return BeautifulSoup(r.data, "lxml") + def scrape_listing(url): - #print ("Web Page: ", url) + # print ("Web Page: ", url) soup = make_soup(url) - #print(soup) - #print(soup.find("div", class_="vim x-item-title").span.text) - #print(soup.find("span", class_="ux-call-to-action__text").text) - #print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.'))) - #print(soup.find("img", loading="eager")["src"]) + # print(soup) + # print(soup.find("div", class_="vim x-item-title").span.text) + # print(soup.find("span", class_="ux-call-to-action__text").text) + # print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.'))) + # print(soup.find("img", loading="eager")["src"]) + + # print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li")) + + category = list() + for span_subcategory in soup.find( + "nav", class_="breadcrumbs breadcrumb--overflow" + ).find_all("span"): + category.append(span_subcategory.text) listing = { - 'title': soup.find("div", class_="vim x-item-title").span.text, - 'directbuy' : True if soup.find("span", class_="ux-call-to-action__text").text == "Sofort-Kaufen" else False, - 'price': float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')), - 'image': soup.find("img", loading="eager")["src"], - 'url' : url + "title": soup.find("div", class_="vim x-item-title").span.text, + "directbuy": ( + True + if soup.find("span", class_="ux-call-to-action__text").text + == "Sofort-Kaufen" + else False + ), + "price": float( + soup.find("div", class_="x-price-primary") + .find("span", class_="ux-textspans") + .text.replace("EUR", "") + .strip() + .replace(",", ".") + ), + "category": category, + "image": soup.find("img", loading="eager")["src"], + "url": url, } return listing -if __name__ == "__main__": - #while(1): - listing = scrape_listing("https://www.ebay.de/itm/226288543773") - print(listing) +if __name__ == "__main__": + # while(1): + listing = scrape_listing("https://www.ebay.de/itm/226288543773") + print(listing) diff --git a/search_eBay.py b/search_eBay.py index 0203c70..7cc57dc 100644 --- a/search_eBay.py +++ b/search_eBay.py @@ -1,9 +1,8 @@ - #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 - Date of last modification: 2022/05/31 + Date of last modification: 2024/08/18 """ from bs4 import BeautifulSoup @@ -15,57 +14,63 @@ import helper urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + def make_soup(url): - user_agent = {'user-agent': helper.get_random_user_agent()} - #print(user_agent) + user_agent = {"user-agent": helper.get_random_user_agent()} + # print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) - return BeautifulSoup(r.data,'lxml') + return BeautifulSoup(r.data, "lxml") + def search(search_term, max_pages): - #sort by newest listing - #display page 1 (first) - #EU only - #60 listings in one result page - #No auction - + # sort by newest listing + # display page 1 (first) + # EU only + # 60 listings in one result page + # No auction + found_listings = set() page_counter = 1 last_result_page = False - + while not last_result_page: result_page_added = 0 - url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn=' + str(page_counter) - #url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter) + url = ( + "https://www.ebay.de/sch/i.html?_from=R40&_nkw=" + + search_term.replace(" ", "+") + + "&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn=" + + str(page_counter) + ) + # url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter) - #print ("Web Page: ", url) + # print ("Web Page: ", url) soup = make_soup(url) results = soup.find_all("div", class_="s-item__info clearfix") for result in results: try: - #rec = { - # 'epid': result.div.div.div.a['href'].split("?", 1)[0], - #} - #print(result) - found_listings.add(result.a['href'].split("?", 1)[0]) + # rec = { + # 'epid': result.div.div.div.a['href'].split("?", 1)[0], + # } + # print(result) + found_listings.add(result.a["href"].split("?", 1)[0]) - #check if listing is allready stored - #if not db.search(Query().epid == rec["epid"]): - # result_page_added += 1 - # db.insert(rec) + # check if listing is already stored + # if not db.search(Query().epid == rec["epid"]): + # result_page_added += 1 + # db.insert(rec) except (AttributeError, KeyError) as ex: pass - if (result_page_added == 0) or (page_counter == max_pages): + if (result_page_added == 0) or (page_counter == max_pages): last_result_page = True page_counter += 1 - + return found_listings + if __name__ == "__main__": - print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages = 4))) - - + print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages=4))) diff --git a/search_seller.py b/search_seller.py index 92aa4dc..acefbdf 100644 --- a/search_seller.py +++ b/search_seller.py @@ -1,4 +1,3 @@ - #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com @@ -19,46 +18,62 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def make_soup(url): - user_agent = {'user-agent': helper.get_random_user_agent()} - #print(user_agent) + user_agent = {"user-agent": helper.get_random_user_agent()} + # print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) - return BeautifulSoup(r.data,'lxml') + return BeautifulSoup(r.data, "lxml") + def seller_listings(seller_name, db): - #sort by newest listing + # sort by newest listing page_counter = 1 last_result_page = False - + while not last_result_page: result_page_added = 0 if page_counter == 1: - url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) + url = ( + "https://www.ebay.de/sch/m.html?_ssn=" + + seller_name + + "&_sop=10&_pgn=" + + str(page_counter) + ) else: - url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) + '&_skc=' + str(60*(page_counter-1)) +'&rt=nc' + url = ( + "https://www.ebay.de/sch/m.html?_ssn=" + + seller_name + + "&_sop=10&_pgn=" + + str(page_counter) + + "&_skc=" + + str(60 * (page_counter - 1)) + + "&rt=nc" + ) - #print ("Web Page: ", url) + # print ("Web Page: ", url) soup = make_soup(url) - results = soup.find_all("li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom") + results = soup.find_all( + "li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom" + ) for result in results: - #print(result) + # print(result) try: rec = { - 'epid': result.a['href'].split("?", 1)[0], + "epid": result.a["href"].split("?", 1)[0], } - #check if listing is allready stored + # check if listing is allready stored if not db.search(Query().epid == rec["epid"]): result_page_added += 1 db.insert(rec) except (AttributeError, KeyError) as ex: pass - if (result_page_added == 0): + if result_page_added == 0: last_result_page = True page_counter += 1 @@ -67,4 +82,3 @@ if __name__ == "__main__": seller_db = TinyDB("seller_db.json") seller_listings("electro-network*com", seller_db) print(len(seller_db.all())) -