2022-05-31 18:41:27 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation : 2022 / 05 / 31
Date of last modification : 2022 / 05 / 31
"""
from bs4 import BeautifulSoup
2022-07-20 23:21:01 +02:00
from datetime import datetime
2022-05-31 18:41:27 +02:00
from tinydb import TinyDB , Query
import urllib3
import sys
2022-07-21 10:39:17 +02:00
from tqdm import tqdm
2022-05-31 18:41:27 +02:00
import scrape_listing as sl
import search_eBay as se
2022-07-20 23:21:01 +02:00
import export_html as exhtml
2022-05-31 18:41:27 +02:00
if __name__ == " __main__ " :
seller_db = TinyDB ( " seller_db.json " )
database_lenght = len ( seller_db . all ( ) )
2022-07-20 23:21:01 +02:00
comparison_results = list ( )
2023-01-10 22:59:59 +01:00
seller_listing_data = False
2022-05-31 18:41:27 +02:00
if database_lenght == 0 :
print ( " Unable to load seller shop database! " )
sys . exit ( - 1 )
2022-07-21 10:39:17 +02:00
#print("Loaded seller shop database: " + str(database_lenght) + " listings")
pbar = tqdm ( total = database_lenght ) #print progress ba
2022-07-20 23:21:01 +02:00
compare_start_timestamp = datetime . now ( ) #set start time for comparing
2022-05-31 18:41:27 +02:00
for shop_listing_url in seller_db . all ( ) :
#print(shop_listing_url["epid"])
for retrieve_counter in range ( 5 ) :
try :
2022-07-20 23:21:01 +02:00
seller_listing_data = sl . scrape_listing ( shop_listing_url [ " epid " ] )
2022-05-31 18:41:27 +02:00
break
except :
2022-07-21 10:39:17 +02:00
pass
#print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
2022-05-31 18:41:27 +02:00
2022-07-20 23:21:01 +02:00
if seller_listing_data :
2022-07-21 10:39:17 +02:00
#print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"])
2022-07-20 23:21:01 +02:00
comparison_result = {
' seller_listing ' : seller_listing_data ,
' competitor_listings ' : list ( ) ,
' max_price_delta ' : float ( 0.0 )
}
2022-05-31 18:41:27 +02:00
2022-07-20 23:21:01 +02:00
competitor_listings = se . search ( seller_listing_data [ " title " ] , 1 )
2022-07-21 10:39:17 +02:00
#print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"])
2022-05-31 18:41:27 +02:00
sys . stdout . flush ( )
2022-06-01 22:37:19 +02:00
cheaper_listings = list ( )
2022-05-31 18:41:27 +02:00
for competitor_listing_url in competitor_listings :
#print(competitor_listing_url)
#print(shop_listing_url["epid"])
2022-06-01 22:37:19 +02:00
if seller_db . search ( Query ( ) . epid == competitor_listing_url ) :
2022-07-21 10:39:17 +02:00
#print("Found listing from sellers shop --> ignore " + competitor_listing_url)
2022-06-01 22:37:19 +02:00
continue
2022-05-31 18:41:27 +02:00
#else:
#print("not from own shop")
for retrieve_counter_competitor in range ( 5 ) :
try :
#print("scraping : " + competitor_listing_url)
competitor_listing_data = sl . scrape_listing ( competitor_listing_url )
break
except :
pass
#print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
if competitor_listing_data :
#print(competitor_listing_data["price"])
2023-01-11 19:18:48 +01:00
if ( competitor_listing_data [ " price " ] < seller_listing_data [ " price " ] ) and ( competitor_listing_data [ " directbuy " ] == True ) :
#print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"]))
2022-06-01 22:37:19 +02:00
cheaper_listings . append ( {
' title ' : competitor_listing_data [ " title " ] ,
' price ' : competitor_listing_data [ " price " ] ,
2022-07-20 23:21:01 +02:00
' image ' : competitor_listing_data [ " image " ] ,
2022-06-01 22:37:19 +02:00
' url ' : competitor_listing_url } )
2022-05-31 18:41:27 +02:00
2022-06-01 22:37:19 +02:00
for cheaper_listing in sorted ( cheaper_listings , key = lambda d : d [ ' price ' ] ) :
#print(cheaper_listing)
2022-07-21 10:39:17 +02:00
#print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"])
2022-07-20 23:21:01 +02:00
comparison_result [ ' competitor_listings ' ] . append ( cheaper_listing )
if comparison_result [ ' max_price_delta ' ] == 0.0 :
comparison_result [ ' max_price_delta ' ] = seller_listing_data [ " price " ] - cheaper_listing [ " price " ]
if cheaper_listings :
comparison_results . append ( comparison_result )
2022-07-21 10:39:17 +02:00
pbar . update ( 1 )
2022-07-21 00:01:48 +02:00
#break
2022-07-21 10:39:17 +02:00
pbar . close ( )
2022-07-21 00:01:48 +02:00
print ( " \n Finished comparing! Found " + str ( len ( comparison_results ) ) + " possibly cheaper listings " )
2022-07-20 23:21:01 +02:00
now = datetime . now ( ) # current date and time
2022-07-21 00:01:48 +02:00
duration_compare = datetime . now ( ) - compare_start_timestamp
hours , remainder = divmod ( duration_compare . total_seconds ( ) , 3600 )
minutes , seconds = divmod ( remainder , 60 )
duration_compare = str ( hours ) + " h " + str ( minutes ) + " m " + str ( round ( seconds , 2 ) ) + " s "
2022-07-20 23:21:01 +02:00
exp = exhtml . exporter ( " ./html_out/ " )
for comparison in sorted ( comparison_results , key = lambda d : d [ ' max_price_delta ' ] , reverse = True ) :
exp . export_comparison ( comparison [ ' seller_listing ' ] , comparison [ ' competitor_listings ' ] )
2022-07-21 00:01:48 +02:00
exp . export_startpage ( str ( database_lenght ) , len ( comparison_results ) , duration_compare , now . strftime ( " % m/ %d / % Y, % H: % M: % S " ) )
2022-07-20 23:21:01 +02:00
2022-05-31 18:41:27 +02:00