diff --git a/compare.py b/compare.py new file mode 100644 index 0000000..12837a0 --- /dev/null +++ b/compare.py @@ -0,0 +1,70 @@ + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" Author: Hendrik Schutter, mail@hendrikschutter.com + Date of creation: 2022/05/31 + Date of last modification: 2022/05/31 +""" + +from bs4 import BeautifulSoup +import datetime +from tinydb import TinyDB, Query +import urllib3 +import sys +import scrape_listing as sl +import search_eBay as se + + +if __name__ == "__main__": + seller_db = TinyDB("seller_db.json") + database_lenght = len(seller_db.all()) + if database_lenght == 0: + print("Unable to load seller shop database!") + sys.exit(-1) + print("Loaded seller shop database: " + str(database_lenght) + " listings") + + for shop_listing_url in seller_db.all(): + #print(shop_listing_url["epid"]) + + for retrieve_counter in range(5): + try: + shop_listing_data = sl.scrape_listing(shop_listing_url["epid"]) + break + except: + #pass + print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"]) + + if shop_listing_data: + print("\n\nCompare: " + shop_listing_data["title"] + " | " + str(shop_listing_data["price"]) + "€ | " + shop_listing_url["epid"]) + + competitor_listings = se.search(shop_listing_data["title"], 1) + print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + shop_listing_data["title"]) + sys.stdout.flush() + for competitor_listing_url in competitor_listings: + #print(competitor_listing_url) + #print(shop_listing_url["epid"]) + + if competitor_listing_url == shop_listing_url["epid"]: + #print("Found listing from sellers shop --> ignore") + break + #else: + #print("not from own shop") + + for retrieve_counter_competitor in range(5): + try: + #print("scraping : " + competitor_listing_url) + competitor_listing_data = sl.scrape_listing(competitor_listing_url) + break + except: + pass + #print("Unable to retrieve competitor listing data from: " + competitor_listing_url) + + if competitor_listing_data: + #print(competitor_listing_data["price"]) + if competitor_listing_data["price"] < shop_listing_data["price"]: + print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(shop_listing_data["price"]) + "€ ---> " + competitor_listing_url) + + + #break + + \ No newline at end of file diff --git a/scrape_listing.py b/scrape_listing.py index c704afe..6ad9e09 100644 --- a/scrape_listing.py +++ b/scrape_listing.py @@ -23,7 +23,7 @@ def make_soup(url): return BeautifulSoup(r.data,'lxml') def scrape_listing(url): - print ("Web Page: ", url) + #print ("Web Page: ", url) soup = make_soup(url) @@ -38,6 +38,6 @@ def scrape_listing(url): return listing if __name__ == "__main__": - listing = scrape_listing("https://www.ebay.de/itm/165445016341") + listing = scrape_listing("https://www.ebay.de/itm/165508291809") print(listing) diff --git a/search_listing.py b/search_eBay.py similarity index 75% rename from search_listing.py rename to search_eBay.py index 3f0d042..0edac54 100644 --- a/search_listing.py +++ b/search_eBay.py @@ -22,13 +22,15 @@ def make_soup(url): r = http.request("GET", url) return BeautifulSoup(r.data,'lxml') -def search_listing(search_term, db, max_pages): +def search(search_term, max_pages): #sort by newest listing #display page 1 (first) #EU only #60 listings in one result page #No auction + found_listings = set() + page_counter = 1 last_result_page = False @@ -43,22 +45,26 @@ def search_listing(search_term, db, max_pages): for result in results: try: - rec = { - 'epid': result.div.div.div.a['href'].split("?", 1)[0], - } + #rec = { + # 'epid': result.div.div.div.a['href'].split("?", 1)[0], + #} + + found_listings.add(result.div.div.div.a['href'].split("?", 1)[0]) + #check if listing is allready stored - if not db.search(Query().epid == rec["epid"]): - result_page_added += 1 - db.insert(rec) + #if not db.search(Query().epid == rec["epid"]): + # result_page_added += 1 + # db.insert(rec) except (AttributeError, KeyError) as ex: pass if (result_page_added == 0) or (page_counter == max_pages): last_result_page = True page_counter += 1 + + return found_listings if __name__ == "__main__": - search_db = TinyDB("search_db.json") - search_listing("mainboard power pc", search_db, max_pages = 4) - print(len(search_db.all())) + print(search("mainboard power pc", max_pages = 4)) +