basic compare
This commit is contained in:
parent
b9d8774916
commit
50f1edd449
70
compare.py
Normal file
70
compare.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||||
|
Date of creation: 2022/05/31
|
||||||
|
Date of last modification: 2022/05/31
|
||||||
|
"""
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import datetime
|
||||||
|
from tinydb import TinyDB, Query
|
||||||
|
import urllib3
|
||||||
|
import sys
|
||||||
|
import scrape_listing as sl
|
||||||
|
import search_eBay as se
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
seller_db = TinyDB("seller_db.json")
|
||||||
|
database_lenght = len(seller_db.all())
|
||||||
|
if database_lenght == 0:
|
||||||
|
print("Unable to load seller shop database!")
|
||||||
|
sys.exit(-1)
|
||||||
|
print("Loaded seller shop database: " + str(database_lenght) + " listings")
|
||||||
|
|
||||||
|
for shop_listing_url in seller_db.all():
|
||||||
|
#print(shop_listing_url["epid"])
|
||||||
|
|
||||||
|
for retrieve_counter in range(5):
|
||||||
|
try:
|
||||||
|
shop_listing_data = sl.scrape_listing(shop_listing_url["epid"])
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
#pass
|
||||||
|
print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
|
||||||
|
|
||||||
|
if shop_listing_data:
|
||||||
|
print("\n\nCompare: " + shop_listing_data["title"] + " | " + str(shop_listing_data["price"]) + "€ | " + shop_listing_url["epid"])
|
||||||
|
|
||||||
|
competitor_listings = se.search(shop_listing_data["title"], 1)
|
||||||
|
print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + shop_listing_data["title"])
|
||||||
|
sys.stdout.flush()
|
||||||
|
for competitor_listing_url in competitor_listings:
|
||||||
|
#print(competitor_listing_url)
|
||||||
|
#print(shop_listing_url["epid"])
|
||||||
|
|
||||||
|
if competitor_listing_url == shop_listing_url["epid"]:
|
||||||
|
#print("Found listing from sellers shop --> ignore")
|
||||||
|
break
|
||||||
|
#else:
|
||||||
|
#print("not from own shop")
|
||||||
|
|
||||||
|
for retrieve_counter_competitor in range(5):
|
||||||
|
try:
|
||||||
|
#print("scraping : " + competitor_listing_url)
|
||||||
|
competitor_listing_data = sl.scrape_listing(competitor_listing_url)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
#print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
|
||||||
|
|
||||||
|
if competitor_listing_data:
|
||||||
|
#print(competitor_listing_data["price"])
|
||||||
|
if competitor_listing_data["price"] < shop_listing_data["price"]:
|
||||||
|
print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(shop_listing_data["price"]) + "€ ---> " + competitor_listing_url)
|
||||||
|
|
||||||
|
|
||||||
|
#break
|
||||||
|
|
||||||
|
|
@ -23,7 +23,7 @@ def make_soup(url):
|
|||||||
return BeautifulSoup(r.data,'lxml')
|
return BeautifulSoup(r.data,'lxml')
|
||||||
|
|
||||||
def scrape_listing(url):
|
def scrape_listing(url):
|
||||||
print ("Web Page: ", url)
|
#print ("Web Page: ", url)
|
||||||
|
|
||||||
soup = make_soup(url)
|
soup = make_soup(url)
|
||||||
|
|
||||||
@ -38,6 +38,6 @@ def scrape_listing(url):
|
|||||||
return listing
|
return listing
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
listing = scrape_listing("https://www.ebay.de/itm/165445016341")
|
listing = scrape_listing("https://www.ebay.de/itm/165508291809")
|
||||||
print(listing)
|
print(listing)
|
||||||
|
|
||||||
|
@ -22,13 +22,15 @@ def make_soup(url):
|
|||||||
r = http.request("GET", url)
|
r = http.request("GET", url)
|
||||||
return BeautifulSoup(r.data,'lxml')
|
return BeautifulSoup(r.data,'lxml')
|
||||||
|
|
||||||
def search_listing(search_term, db, max_pages):
|
def search(search_term, max_pages):
|
||||||
#sort by newest listing
|
#sort by newest listing
|
||||||
#display page 1 (first)
|
#display page 1 (first)
|
||||||
#EU only
|
#EU only
|
||||||
#60 listings in one result page
|
#60 listings in one result page
|
||||||
#No auction
|
#No auction
|
||||||
|
|
||||||
|
found_listings = set()
|
||||||
|
|
||||||
page_counter = 1
|
page_counter = 1
|
||||||
last_result_page = False
|
last_result_page = False
|
||||||
|
|
||||||
@ -43,22 +45,26 @@ def search_listing(search_term, db, max_pages):
|
|||||||
|
|
||||||
for result in results:
|
for result in results:
|
||||||
try:
|
try:
|
||||||
rec = {
|
#rec = {
|
||||||
'epid': result.div.div.div.a['href'].split("?", 1)[0],
|
# 'epid': result.div.div.div.a['href'].split("?", 1)[0],
|
||||||
}
|
#}
|
||||||
|
|
||||||
|
found_listings.add(result.div.div.div.a['href'].split("?", 1)[0])
|
||||||
|
|
||||||
#check if listing is allready stored
|
#check if listing is allready stored
|
||||||
if not db.search(Query().epid == rec["epid"]):
|
#if not db.search(Query().epid == rec["epid"]):
|
||||||
result_page_added += 1
|
# result_page_added += 1
|
||||||
db.insert(rec)
|
# db.insert(rec)
|
||||||
|
|
||||||
except (AttributeError, KeyError) as ex:
|
except (AttributeError, KeyError) as ex:
|
||||||
pass
|
pass
|
||||||
if (result_page_added == 0) or (page_counter == max_pages):
|
if (result_page_added == 0) or (page_counter == max_pages):
|
||||||
last_result_page = True
|
last_result_page = True
|
||||||
page_counter += 1
|
page_counter += 1
|
||||||
|
|
||||||
|
return found_listings
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
search_db = TinyDB("search_db.json")
|
print(search("mainboard power pc", max_pages = 4))
|
||||||
search_listing("mainboard power pc", search_db, max_pages = 4)
|
|
||||||
print(len(search_db.all()))
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user