#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 Date of last modification: 2022/05/31 """ from bs4 import BeautifulSoup from datetime import datetime from tinydb import TinyDB, Query import urllib3 import sys from tqdm import tqdm import scrape_listing as sl import search_eBay as se import export_html as exhtml if __name__ == "__main__": seller_db = TinyDB("seller_db.json") database_lenght = len(seller_db.all()) comparison_results = list() seller_listing_data = False if database_lenght == 0: print("Unable to load seller shop database!") sys.exit(-1) #print("Loaded seller shop database: " + str(database_lenght) + " listings") pbar = tqdm(total=database_lenght) #print progress ba compare_start_timestamp = datetime.now() #set start time for comparing for shop_listing_url in seller_db.all(): #print(shop_listing_url["epid"]) for retrieve_counter in range(5): try: seller_listing_data = sl.scrape_listing(shop_listing_url["epid"]) break except: pass #print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"]) if seller_listing_data: #print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"]) comparison_result = { 'seller_listing': seller_listing_data, 'competitor_listings': list(), 'max_price_delta': float(0.0) } competitor_listings = se.search(seller_listing_data["title"], 1) #print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"]) sys.stdout.flush() cheaper_listings = list() for competitor_listing_url in competitor_listings: #print(competitor_listing_url) #print(shop_listing_url["epid"]) if seller_db.search(Query().epid == competitor_listing_url): #print("Found listing from sellers shop --> ignore " + competitor_listing_url) continue #else: #print("not from own shop") for retrieve_counter_competitor in range(5): try: #print("scraping : " + competitor_listing_url) competitor_listing_data = sl.scrape_listing(competitor_listing_url) break except: pass #print("Unable to retrieve competitor listing data from: " + competitor_listing_url) if competitor_listing_data: #print(competitor_listing_data["price"]) if (competitor_listing_data["price"] < seller_listing_data["price"]) and (competitor_listing_data["directbuy"] == True): #print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"])) cheaper_listings.append({ 'title': competitor_listing_data["title"], 'price': competitor_listing_data["price"], 'image': competitor_listing_data["image"], 'url': competitor_listing_url}) for cheaper_listing in sorted(cheaper_listings, key=lambda d: d['price']) : #print(cheaper_listing) #print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"]) comparison_result['competitor_listings'].append(cheaper_listing) if comparison_result['max_price_delta'] == 0.0: comparison_result['max_price_delta'] = seller_listing_data["price"] - cheaper_listing["price"] if cheaper_listings: comparison_results.append(comparison_result) pbar.update(1) #break pbar.close() print("\nFinished comparing! Found " + str(len(comparison_results)) + " possibly cheaper listings") now = datetime.now() # current date and time duration_compare = datetime.now() - compare_start_timestamp hours, remainder = divmod(duration_compare.total_seconds(), 3600) minutes, seconds = divmod(remainder, 60) duration_compare = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" exp = exhtml.exporter("./html_out/") for comparison in sorted(comparison_results, key=lambda d: d['max_price_delta'], reverse=True): exp.export_comparison(comparison['seller_listing'], comparison['competitor_listings']) exp.export_startpage(str(database_lenght), len(comparison_results), duration_compare, now.strftime("%m/%d/%Y, %H:%M:%S"))