eBayCompetitorPriceCompare/compare.py

118 lines
5.1 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31
Date of last modification: 2022/05/31
"""
from bs4 import BeautifulSoup
from datetime import datetime
from tinydb import TinyDB, Query
import urllib3
import sys
from tqdm import tqdm
import scrape_listing as sl
import search_eBay as se
import export_html as exhtml
if __name__ == "__main__":
seller_db = TinyDB("seller_db.json")
database_lenght = len(seller_db.all())
comparison_results = list()
seller_listing_data = False
if database_lenght == 0:
print("Unable to load seller shop database!")
sys.exit(-1)
#print("Loaded seller shop database: " + str(database_lenght) + " listings")
pbar = tqdm(total=database_lenght) #print progress ba
compare_start_timestamp = datetime.now() #set start time for comparing
for shop_listing_url in seller_db.all():
#print(shop_listing_url["epid"])
for retrieve_counter in range(5):
try:
seller_listing_data = sl.scrape_listing(shop_listing_url["epid"])
break
except:
pass
#print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
if seller_listing_data:
#print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"])
comparison_result = {
'seller_listing': seller_listing_data,
'competitor_listings': list(),
'max_price_delta': float(0.0)
}
competitor_listings = se.search(seller_listing_data["title"], 1)
#print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"])
sys.stdout.flush()
cheaper_listings = list()
for competitor_listing_url in competitor_listings:
#print(competitor_listing_url)
#print(shop_listing_url["epid"])
if seller_db.search(Query().epid == competitor_listing_url):
#print("Found listing from sellers shop --> ignore " + competitor_listing_url)
continue
#else:
#print("not from own shop")
for retrieve_counter_competitor in range(5):
try:
#print("scraping : " + competitor_listing_url)
competitor_listing_data = sl.scrape_listing(competitor_listing_url)
break
except:
pass
#print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
if competitor_listing_data:
#print(competitor_listing_data["price"])
if (competitor_listing_data["price"] < seller_listing_data["price"]) and (competitor_listing_data["directbuy"] == True):
#print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"]))
cheaper_listings.append({
'title': competitor_listing_data["title"],
'price': competitor_listing_data["price"],
'image': competitor_listing_data["image"],
'url': competitor_listing_url})
for cheaper_listing in sorted(cheaper_listings, key=lambda d: d['price']) :
#print(cheaper_listing)
#print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"])
comparison_result['competitor_listings'].append(cheaper_listing)
if comparison_result['max_price_delta'] == 0.0:
comparison_result['max_price_delta'] = seller_listing_data["price"] - cheaper_listing["price"]
if cheaper_listings:
comparison_results.append(comparison_result)
pbar.update(1)
#break
pbar.close()
print("\nFinished comparing! Found " + str(len(comparison_results)) + " possibly cheaper listings")
now = datetime.now() # current date and time
duration_compare = datetime.now() - compare_start_timestamp
hours, remainder = divmod(duration_compare.total_seconds(), 3600)
minutes, seconds = divmod(remainder, 60)
duration_compare = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
exp = exhtml.exporter("./html_out/")
for comparison in sorted(comparison_results, key=lambda d: d['max_price_delta'], reverse=True):
exp.export_comparison(comparison['seller_listing'], comparison['competitor_listings'])
exp.export_startpage(str(database_lenght), len(comparison_results), duration_compare, now.strftime("%m/%d/%Y, %H:%M:%S"))