basic compare

2022-05-31 18:41:27 +02:00
parent b9d8774916
commit 50f1edd449
3 changed files with 88 additions and 12 deletions
--- a/compare.py
+++ b/compare.py
@ -0,0 +1,70 @@
+
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+""" Author:                     Hendrik Schutter, mail@hendrikschutter.com
+    Date of creation:           2022/05/31
+    Date of last modification:  2022/05/31
+"""
+
+from bs4 import BeautifulSoup
+import datetime
+from tinydb import TinyDB, Query
+import urllib3
+import sys
+import scrape_listing as sl
+import search_eBay as se
+
+
+if __name__ == "__main__":
+    seller_db = TinyDB("seller_db.json")
+    database_lenght = len(seller_db.all())
+    if database_lenght == 0:
+        print("Unable to load seller shop database!")
+        sys.exit(-1)
+    print("Loaded seller shop database: " + str(database_lenght) + " listings")
+
+    for shop_listing_url in seller_db.all():
+        #print(shop_listing_url["epid"])
+
+        for retrieve_counter in range(5): 
+            try:
+                shop_listing_data = sl.scrape_listing(shop_listing_url["epid"])
+                break
+            except:
+                #pass
+                print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
+            
+        if shop_listing_data:
+            print("\n\nCompare: " + shop_listing_data["title"] + " | " + str(shop_listing_data["price"]) + "€ | " + shop_listing_url["epid"])
+
+            competitor_listings = se.search(shop_listing_data["title"], 1)  
+            print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + shop_listing_data["title"])  
+            sys.stdout.flush()
+            for competitor_listing_url in competitor_listings:
+                #print(competitor_listing_url)
+                #print(shop_listing_url["epid"])
+
+                if competitor_listing_url == shop_listing_url["epid"]:
+                    #print("Found listing from sellers shop --> ignore")
+                    break
+                #else:
+                    #print("not from own shop")
+
+                for retrieve_counter_competitor in range(5): 
+                    try:
+                        #print("scraping : " + competitor_listing_url)
+                        competitor_listing_data = sl.scrape_listing(competitor_listing_url)
+                        break
+                    except:
+                        pass
+                        #print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
+
+                if competitor_listing_data:
+                    #print(competitor_listing_data["price"])
+                    if competitor_listing_data["price"] < shop_listing_data["price"]:
+                        print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(shop_listing_data["price"]) + "€ ---> " +  competitor_listing_url)
+                        
+
+        #break
+        
+        
--- a/scrape_listing.py
+++ b/scrape_listing.py
@ -23,7 +23,7 @@ def make_soup(url):
    return BeautifulSoup(r.data,'lxml')

 def scrape_listing(url):
-    print ("Web Page: ", url)
+    #print ("Web Page: ", url)

    soup = make_soup(url)

@ -38,6 +38,6 @@ def scrape_listing(url):
    return listing

 if __name__ == "__main__":
-   listing =  scrape_listing("https://www.ebay.de/itm/165445016341")
+   listing =  scrape_listing("https://www.ebay.de/itm/165508291809")
   print(listing)

--- a/search_listing.py
+++ b/search_listing.py
@ -22,13 +22,15 @@ def make_soup(url):
    r = http.request("GET", url)
    return BeautifulSoup(r.data,'lxml')

-def search_listing(search_term, db, max_pages):
+def search(search_term, max_pages):
    #sort by newest listing
    #display page 1 (first)
    #EU only
    #60 listings in one result page
    #No auction
    
+    found_listings = set()
+
    page_counter = 1
    last_result_page = False
    
@ -43,22 +45,26 @@ def search_listing(search_term, db, max_pages):

        for result in results:
            try:
-                rec = {
-                    'epid': result.div.div.div.a['href'].split("?", 1)[0],
-                }
+                #rec = {
+                 #   'epid': result.div.div.div.a['href'].split("?", 1)[0],
+                #}
+
+                found_listings.add(result.div.div.div.a['href'].split("?", 1)[0])
+
                #check if listing is allready stored
-                if not db.search(Query().epid == rec["epid"]):
-                    result_page_added += 1
-                    db.insert(rec)
+                #if not db.search(Query().epid == rec["epid"]):
+                 #   result_page_added += 1
+                  #  db.insert(rec)

            except (AttributeError, KeyError) as ex:
                pass
        if (result_page_added == 0) or (page_counter == max_pages): 
            last_result_page = True
        page_counter += 1
+        
+    return found_listings

 if __name__ == "__main__":
-    search_db = TinyDB("search_db.json")
-    search_listing("mainboard power pc", search_db, max_pages = 4)
-    print(len(search_db.all()))
+    print(search("mainboard power pc", max_pages = 4))
+