#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 Date of last modification: 2024/08/18 """ from bs4 import BeautifulSoup from datetime import datetime from tinydb import TinyDB, Query import urllib3 import sys from tqdm import tqdm import scrape_listing as sl import search_eBay as se import export_html as exhtml if __name__ == "__main__": seller_db = TinyDB("seller_db.json") database_length = len(seller_db.all()) comparison_results = list() seller_listing_data = False if database_length == 0: print("Unable to load seller shop database!") sys.exit(-1) # print("Loaded seller shop database: " + str(database_lenght) + " listings") pbar = tqdm(total=database_length) # print progress ba compare_start_timestamp = datetime.now() # set start time for comparing for shop_listing_url in seller_db.all(): # print(shop_listing_url["epid"]) for retrieve_counter in range(5): try: seller_listing_data = sl.scrape_listing(shop_listing_url["epid"]) break except: pass # print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"]) if seller_listing_data: #print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"]) comparison_result = { "seller_listing": seller_listing_data, "competitor_listings": list(), "max_price_delta": float(0.0), } competitor_listings = se.search(seller_listing_data["title"], 1) #print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"]) sys.stdout.flush() cheaper_listings = list() for competitor_listing_url in competitor_listings: print(competitor_listing_url) # print(shop_listing_url["epid"]) if seller_db.search(Query().epid == competitor_listing_url): # print("Found listing from sellers shop --> ignore " + competitor_listing_url) continue # else: # print("not from own shop") competitor_listing_data = False for retrieve_counter_competitor in range(5): try: # print("scraping : " + competitor_listing_url) competitor_listing_data = sl.scrape_listing( competitor_listing_url ) break except: pass # print("Unable to retrieve competitor listing data from: " + competitor_listing_url) if competitor_listing_data: # print(competitor_listing_data["price"]) if (competitor_listing_data["category"] != seller_listing_data["category"]): #print("Found competitor_listing is not in same category!") continue if competitor_listing_data["directbuy"] != True: #print("Found competitor_listing is not direct buy!") continue if (competitor_listing_data["price"] < seller_listing_data["price"]): # print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"])) cheaper_listings.append( { "title": competitor_listing_data["title"], "price": competitor_listing_data["price"], "image": competitor_listing_data["image"], "url": competitor_listing_url, } ) for cheaper_listing in sorted(cheaper_listings, key=lambda d: d["price"]): # print(cheaper_listing) # print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"]) comparison_result["competitor_listings"].append(cheaper_listing) if comparison_result["max_price_delta"] == 0.0: comparison_result["max_price_delta"] = ( seller_listing_data["price"] - cheaper_listing["price"] ) if cheaper_listings: comparison_results.append(comparison_result) pbar.update(1) # break pbar.close() print( "\nFinished comparing! Found " + str(len(comparison_results)) + " possibly cheaper listings" ) now = datetime.now() # current date and time duration_compare = datetime.now() - compare_start_timestamp hours, remainder = divmod(duration_compare.total_seconds(), 3600) minutes, seconds = divmod(remainder, 60) duration_compare = ( str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" ) exp = exhtml.exporter("./html_out/") for comparison in sorted( comparison_results, key=lambda d: d["max_price_delta"], reverse=True ): exp.export_comparison( comparison["seller_listing"], comparison["competitor_listings"] ) exp.export_startpage( str(database_length), len(comparison_results), duration_compare, now.strftime("%m/%d/%Y, %H:%M:%S"), )