140 lines
5.8 KiB
Python
140 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
|
Date of creation: 2022/05/31
|
|
Date of last modification: 2024/08/18
|
|
"""
|
|
|
|
from bs4 import BeautifulSoup
|
|
from datetime import datetime
|
|
from tinydb import TinyDB, Query
|
|
import urllib3
|
|
import sys
|
|
from tqdm import tqdm
|
|
import scrape_listing as sl
|
|
import search_eBay as se
|
|
import export_html as exhtml
|
|
|
|
if __name__ == "__main__":
|
|
seller_db = TinyDB("seller_db.json")
|
|
database_length = len(seller_db.all())
|
|
comparison_results = list()
|
|
seller_listing_data = False
|
|
if database_length == 0:
|
|
print("Unable to load seller shop database!")
|
|
sys.exit(-1)
|
|
# print("Loaded seller shop database: " + str(database_lenght) + " listings")
|
|
pbar = tqdm(total=database_length) # print progress bar
|
|
compare_start_timestamp = datetime.now() # set start time for comparing
|
|
for shop_listing_url in seller_db.all():
|
|
# print(shop_listing_url["epid"])
|
|
for retrieve_counter in range(42):
|
|
try:
|
|
seller_listing_data = sl.scrape_listing(shop_listing_url["epid"])
|
|
break
|
|
except:
|
|
pass
|
|
# print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
|
|
|
|
if seller_listing_data:
|
|
#print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"])
|
|
|
|
comparison_result = {
|
|
"seller_listing": seller_listing_data,
|
|
"competitor_listings": list(),
|
|
"max_price_delta": float(0.0),
|
|
}
|
|
|
|
competitor_listings = se.search(seller_listing_data["title"], 1)
|
|
#print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"])
|
|
sys.stdout.flush()
|
|
cheaper_listings = list()
|
|
for competitor_listing_url in competitor_listings:
|
|
# print(competitor_listing_url)
|
|
# print(shop_listing_url["epid"])
|
|
|
|
if seller_db.search(Query().epid == competitor_listing_url):
|
|
# print("Found listing from sellers shop --> ignore " + competitor_listing_url)
|
|
continue
|
|
# else:
|
|
# print("not from own shop")
|
|
|
|
competitor_listing_data = False
|
|
for retrieve_counter_competitor in range(42):
|
|
try:
|
|
# print("scraping : " + competitor_listing_url)
|
|
competitor_listing_data = sl.scrape_listing(
|
|
competitor_listing_url
|
|
)
|
|
break
|
|
except:
|
|
pass
|
|
# print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
|
|
|
|
if competitor_listing_data:
|
|
# print(competitor_listing_data["price"])
|
|
|
|
if (competitor_listing_data["category"] != seller_listing_data["category"]):
|
|
#print("Found competitor_listing is not in same category!")
|
|
continue
|
|
|
|
if competitor_listing_data["directbuy"] != True:
|
|
#print("Found competitor_listing is not direct buy!")
|
|
continue
|
|
|
|
if (competitor_listing_data["price"] < seller_listing_data["price"]):
|
|
# print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"]))
|
|
cheaper_listings.append(
|
|
{
|
|
"title": competitor_listing_data["title"],
|
|
"price": competitor_listing_data["price"],
|
|
"image": competitor_listing_data["image"],
|
|
"url": competitor_listing_url,
|
|
}
|
|
)
|
|
|
|
for cheaper_listing in sorted(cheaper_listings, key=lambda d: d["price"]):
|
|
# print(cheaper_listing)
|
|
# print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"])
|
|
comparison_result["competitor_listings"].append(cheaper_listing)
|
|
if comparison_result["max_price_delta"] == 0.0:
|
|
comparison_result["max_price_delta"] = (
|
|
seller_listing_data["price"] - cheaper_listing["price"]
|
|
)
|
|
|
|
if cheaper_listings:
|
|
comparison_results.append(comparison_result)
|
|
pbar.update(1)
|
|
# break
|
|
pbar.close()
|
|
print(
|
|
"\nFinished comparing! Found "
|
|
+ str(len(comparison_results))
|
|
+ " possibly cheaper listings"
|
|
)
|
|
|
|
now = datetime.now() # current date and time
|
|
|
|
duration_compare = datetime.now() - compare_start_timestamp
|
|
hours, remainder = divmod(duration_compare.total_seconds(), 3600)
|
|
minutes, seconds = divmod(remainder, 60)
|
|
duration_compare = (
|
|
str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
|
|
)
|
|
|
|
exp = exhtml.exporter("./html_out/")
|
|
|
|
for comparison in sorted(
|
|
comparison_results, key=lambda d: d["max_price_delta"], reverse=True
|
|
):
|
|
exp.export_comparison(
|
|
comparison["seller_listing"], comparison["competitor_listings"]
|
|
)
|
|
|
|
exp.export_startpage(
|
|
str(database_length),
|
|
len(comparison_results),
|
|
duration_compare,
|
|
now.strftime("%m/%d/%Y, %H:%M:%S"),
|
|
)
|