only compare items in same category

This commit is contained in:
Hendrik Schutter 2024-08-18 22:49:48 +02:00
parent a34cbb5f71
commit ebf379a716
6 changed files with 259 additions and 174 deletions

View File

@ -1,9 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com """ Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31 Date of creation: 2022/05/31
Date of last modification: 2022/05/31 Date of last modification: 2024/08/18
""" """
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -18,32 +17,32 @@ import export_html as exhtml
if __name__ == "__main__": if __name__ == "__main__":
seller_db = TinyDB("seller_db.json") seller_db = TinyDB("seller_db.json")
database_lenght = len(seller_db.all()) database_length = len(seller_db.all())
comparison_results = list() comparison_results = list()
seller_listing_data = False seller_listing_data = False
if database_lenght == 0: if database_length == 0:
print("Unable to load seller shop database!") print("Unable to load seller shop database!")
sys.exit(-1) sys.exit(-1)
#print("Loaded seller shop database: " + str(database_lenght) + " listings") # print("Loaded seller shop database: " + str(database_lenght) + " listings")
pbar = tqdm(total=database_lenght) #print progress ba pbar = tqdm(total=database_length) # print progress ba
compare_start_timestamp = datetime.now() #set start time for comparing compare_start_timestamp = datetime.now() # set start time for comparing
for shop_listing_url in seller_db.all(): for shop_listing_url in seller_db.all():
#print(shop_listing_url["epid"]) # print(shop_listing_url["epid"])
for retrieve_counter in range(5): for retrieve_counter in range(5):
try: try:
seller_listing_data = sl.scrape_listing(shop_listing_url["epid"]) seller_listing_data = sl.scrape_listing(shop_listing_url["epid"])
break break
except: except:
pass pass
#print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"]) # print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
if seller_listing_data: if seller_listing_data:
#print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"]) #print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"])
comparison_result = { comparison_result = {
'seller_listing': seller_listing_data, "seller_listing": seller_listing_data,
'competitor_listings': list(), "competitor_listings": list(),
'max_price_delta': float(0.0) "max_price_delta": float(0.0),
} }
competitor_listings = se.search(seller_listing_data["title"], 1) competitor_listings = se.search(seller_listing_data["title"], 1)
@ -51,68 +50,90 @@ if __name__ == "__main__":
sys.stdout.flush() sys.stdout.flush()
cheaper_listings = list() cheaper_listings = list()
for competitor_listing_url in competitor_listings: for competitor_listing_url in competitor_listings:
#print(competitor_listing_url) print(competitor_listing_url)
#print(shop_listing_url["epid"]) # print(shop_listing_url["epid"])
if seller_db.search(Query().epid == competitor_listing_url): if seller_db.search(Query().epid == competitor_listing_url):
#print("Found listing from sellers shop --> ignore " + competitor_listing_url) # print("Found listing from sellers shop --> ignore " + competitor_listing_url)
continue continue
#else: # else:
#print("not from own shop") # print("not from own shop")
competitor_listing_data = False
for retrieve_counter_competitor in range(5): for retrieve_counter_competitor in range(5):
try: try:
#print("scraping : " + competitor_listing_url) # print("scraping : " + competitor_listing_url)
competitor_listing_data = sl.scrape_listing(competitor_listing_url) competitor_listing_data = sl.scrape_listing(
competitor_listing_url
)
break break
except: except:
pass pass
#print("Unable to retrieve competitor listing data from: " + competitor_listing_url) # print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
if competitor_listing_data: if competitor_listing_data:
#print(competitor_listing_data["price"]) # print(competitor_listing_data["price"])
if (competitor_listing_data["price"] < seller_listing_data["price"]) and (competitor_listing_data["directbuy"] == True):
#print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"]))
cheaper_listings.append({
'title': competitor_listing_data["title"],
'price': competitor_listing_data["price"],
'image': competitor_listing_data["image"],
'url': competitor_listing_url})
for cheaper_listing in sorted(cheaper_listings, key=lambda d: d['price']) : if (competitor_listing_data["category"] != seller_listing_data["category"]):
#print(cheaper_listing) #print("Found competitor_listing is not in same category!")
#print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"]) continue
comparison_result['competitor_listings'].append(cheaper_listing)
if comparison_result['max_price_delta'] == 0.0: if competitor_listing_data["directbuy"] != True:
comparison_result['max_price_delta'] = seller_listing_data["price"] - cheaper_listing["price"] #print("Found competitor_listing is not direct buy!")
continue
if (competitor_listing_data["price"] < seller_listing_data["price"]):
# print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"]))
cheaper_listings.append(
{
"title": competitor_listing_data["title"],
"price": competitor_listing_data["price"],
"image": competitor_listing_data["image"],
"url": competitor_listing_url,
}
)
for cheaper_listing in sorted(cheaper_listings, key=lambda d: d["price"]):
# print(cheaper_listing)
# print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"])
comparison_result["competitor_listings"].append(cheaper_listing)
if comparison_result["max_price_delta"] == 0.0:
comparison_result["max_price_delta"] = (
seller_listing_data["price"] - cheaper_listing["price"]
)
if cheaper_listings: if cheaper_listings:
comparison_results.append(comparison_result) comparison_results.append(comparison_result)
pbar.update(1) pbar.update(1)
#break # break
pbar.close() pbar.close()
print("\nFinished comparing! Found " + str(len(comparison_results)) + " possibly cheaper listings") print(
"\nFinished comparing! Found "
+ str(len(comparison_results))
+ " possibly cheaper listings"
)
now = datetime.now() # current date and time now = datetime.now() # current date and time
duration_compare = datetime.now() - compare_start_timestamp duration_compare = datetime.now() - compare_start_timestamp
hours, remainder = divmod(duration_compare.total_seconds(), 3600) hours, remainder = divmod(duration_compare.total_seconds(), 3600)
minutes, seconds = divmod(remainder, 60) minutes, seconds = divmod(remainder, 60)
duration_compare = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" duration_compare = (
str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
)
exp = exhtml.exporter("./html_out/") exp = exhtml.exporter("./html_out/")
for comparison in sorted(comparison_results, key=lambda d: d['max_price_delta'], reverse=True): for comparison in sorted(
exp.export_comparison(comparison['seller_listing'], comparison['competitor_listings']) comparison_results, key=lambda d: d["max_price_delta"], reverse=True
):
exp.export_startpage(str(database_lenght), len(comparison_results), duration_compare, now.strftime("%m/%d/%Y, %H:%M:%S")) exp.export_comparison(
comparison["seller_listing"], comparison["competitor_listings"]
)
exp.export_startpage(
str(database_length),
len(comparison_results),
duration_compare,
now.strftime("%m/%d/%Y, %H:%M:%S"),
)

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com """ Author: Hendrik Schutter, mail@hendrikschutter.com
@ -11,19 +10,21 @@ import os
import template_html as thtml import template_html as thtml
import shutil import shutil
class exporter: class exporter:
export_dir="" export_dir = ""
tsStart = 0 tsStart = 0
counter = 0 counter = 0
def __init__(self, path): def __init__(self, path):
self.export_dir = path self.export_dir = path
self.tsStart = datetime.now() #set start time for exporting self.tsStart = datetime.now() # set start time for exporting
try: try:
os.mkdir(self.export_dir) os.mkdir(self.export_dir)
except FileExistsError: except FileExistsError:
pass pass
try: try:
os.mkdir(os.path.join(self.export_dir,"compare/")) os.mkdir(os.path.join(self.export_dir, "compare/"))
except FileExistsError: except FileExistsError:
pass pass
@ -31,21 +32,30 @@ class exporter:
def copy_static_export(self): def copy_static_export(self):
try: try:
os.mkdir(os.path.join(self.export_dir,"css/")) os.mkdir(os.path.join(self.export_dir, "css/"))
except FileExistsError: except FileExistsError:
pass pass
try: try:
os.mkdir(os.path.join(self.export_dir,"data/")) os.mkdir(os.path.join(self.export_dir, "data/"))
except FileExistsError: except FileExistsError:
pass pass
shutil.copy("./html/css/w3.css", os.path.join(self.export_dir,"css/","w3.css")) shutil.copy(
shutil.copy("./html/data/favicon.ico", os.path.join(self.export_dir,"data/","favicon.ico")) "./html/css/w3.css", os.path.join(self.export_dir, "css/", "w3.css")
shutil.copy("./html/data/icon.png", os.path.join(self.export_dir,"data/","icon.png")) )
shutil.copy(
"./html/data/favicon.ico",
os.path.join(self.export_dir, "data/", "favicon.ico"),
)
shutil.copy(
"./html/data/icon.png", os.path.join(self.export_dir, "data/", "icon.png")
)
def export_comparison(self, seller_listing, competitor_listings): def export_comparison(self, seller_listing, competitor_listings):
self.counter +=1 self.counter += 1
f = open(os.path.join(self.export_dir, "compare/", str(self.counter) + ".html"), "a") f = open(
os.path.join(self.export_dir, "compare/", str(self.counter) + ".html"), "a"
)
f.write(thtml.html_comparison_head()) f.write(thtml.html_comparison_head())
f.write("<body>") f.write("<body>")
@ -54,58 +64,72 @@ class exporter:
f.write(thtml.html_comparison_competitor_list_header()) f.write(thtml.html_comparison_competitor_list_header())
competitor_listing_counter = 0 competitor_listing_counter = 0
for competitor_listing in competitor_listings: for competitor_listing in competitor_listings:
competitor_listing_counter +=1 competitor_listing_counter += 1
f.write(thtml.html_comparison_competitor_listing(competitor_listing, competitor_listing_counter)) f.write(
thtml.html_comparison_competitor_listing(
competitor_listing, competitor_listing_counter
)
)
f.write(thtml.html_comparison_trailer()) f.write(thtml.html_comparison_trailer())
f.close() f.close()
def export_startpage(self, seller_listings_count, cheaper_listings_count, compare_time, date): def export_startpage(
self, seller_listings_count, cheaper_listings_count, compare_time, date
):
duration_export = datetime.now() - self.tsStart duration_export = datetime.now() - self.tsStart
hours, remainder = divmod(duration_export.total_seconds(), 3600) hours, remainder = divmod(duration_export.total_seconds(), 3600)
minutes, seconds = divmod(remainder, 60) minutes, seconds = divmod(remainder, 60)
duration_export = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s" duration_export = (
str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
)
f = open(os.path.join(self.export_dir, "index.html"), "a") f = open(os.path.join(self.export_dir, "index.html"), "a")
f.write(thtml.html_startpage_head()) f.write(thtml.html_startpage_head())
f.write(thtml.html_startpage_info(seller_listings_count, cheaper_listings_count, compare_time, duration_export, date)) f.write(
thtml.html_startpage_info(
seller_listings_count,
cheaper_listings_count,
compare_time,
duration_export,
date,
)
)
f.write(thtml.html_startpage_trailer()) f.write(thtml.html_startpage_trailer())
f.close() f.close()
if __name__ == "__main__": if __name__ == "__main__":
seller_listing_dummy = { seller_listing_dummy = {
'title': "Seller Title", "title": "Seller Title",
'price': float(42.42), "price": float(42.42),
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
'url' : "https://www.ebay.de/itm/165508291809" "url": "https://www.ebay.de/itm/165508291809",
} }
competitor_listings_dummy = [ competitor_listings_dummy = [
{ {
'title': "Competitor Title 01", "title": "Competitor Title 01",
'price': float(40.42), "price": float(40.42),
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
'url' : "https://www.ebay.de/itm/165508291809" "url": "https://www.ebay.de/itm/165508291809",
}, },
{ {
'title': "Competitor Title 02", "title": "Competitor Title 02",
'price': float(41.42), "price": float(41.42),
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
'url' : "https://www.ebay.de/itm/165508291809" "url": "https://www.ebay.de/itm/165508291809",
}, },
{ {
'title': "Competitor Title 03", "title": "Competitor Title 03",
'price': float(42.00), "price": float(42.00),
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg", "image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
'url' : "https://www.ebay.de/itm/165508291809" "url": "https://www.ebay.de/itm/165508291809",
} },
] ]
exp = exporter("./html_out/") exp = exporter("./html_out/")
exp.export_comparison(seller_listing_dummy, competitor_listings_dummy) exp.export_comparison(seller_listing_dummy, competitor_listings_dummy)
exp.export_startpage(10, 2, 0, "d") exp.export_startpage(10, 2, 0, "d")

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com """ Author: Hendrik Schutter, mail@hendrikschutter.com
@ -7,15 +6,16 @@
""" """
import random import random
def get_random_user_agent(): def get_random_user_agent():
uastrings = [ uastrings = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",\ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",\ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\ "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",\ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
"Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",\ "Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0"\ "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",
] ]
return random.choice(uastrings)+str(random.randrange(255)) return random.choice(uastrings) + str(random.randrange(255))

View File

@ -1,9 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com """ Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31 Date of creation: 2022/05/31
Date of last modification: 2022/05/31 Date of last modification: 2024/08/18
""" """
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -15,34 +14,56 @@ import helper
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url): def make_soup(url):
user_agent = {'user-agent': helper.get_random_user_agent()} user_agent = {"user-agent": helper.get_random_user_agent()}
#print(user_agent) # print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent) http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url) r = http.request("GET", url)
return BeautifulSoup(r.data,'lxml') return BeautifulSoup(r.data, "lxml")
def scrape_listing(url): def scrape_listing(url):
#print ("Web Page: ", url) # print ("Web Page: ", url)
soup = make_soup(url) soup = make_soup(url)
#print(soup) # print(soup)
#print(soup.find("div", class_="vim x-item-title").span.text) # print(soup.find("div", class_="vim x-item-title").span.text)
#print(soup.find("span", class_="ux-call-to-action__text").text) # print(soup.find("span", class_="ux-call-to-action__text").text)
#print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.'))) # print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
#print(soup.find("img", loading="eager")["src"]) # print(soup.find("img", loading="eager")["src"])
# print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li"))
category = list()
for span_subcategory in soup.find(
"nav", class_="breadcrumbs breadcrumb--overflow"
).find_all("span"):
category.append(span_subcategory.text)
listing = { listing = {
'title': soup.find("div", class_="vim x-item-title").span.text, "title": soup.find("div", class_="vim x-item-title").span.text,
'directbuy' : True if soup.find("span", class_="ux-call-to-action__text").text == "Sofort-Kaufen" else False, "directbuy": (
'price': float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')), True
'image': soup.find("img", loading="eager")["src"], if soup.find("span", class_="ux-call-to-action__text").text
'url' : url == "Sofort-Kaufen"
else False
),
"price": float(
soup.find("div", class_="x-price-primary")
.find("span", class_="ux-textspans")
.text.replace("EUR", "")
.strip()
.replace(",", ".")
),
"category": category,
"image": soup.find("img", loading="eager")["src"],
"url": url,
} }
return listing return listing
if __name__ == "__main__": if __name__ == "__main__":
#while(1): # while(1):
listing = scrape_listing("https://www.ebay.de/itm/226288543773") listing = scrape_listing("https://www.ebay.de/itm/226288543773")
print(listing) print(listing)

View File

@ -1,9 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com """ Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31 Date of creation: 2022/05/31
Date of last modification: 2022/05/31 Date of last modification: 2024/08/18
""" """
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -15,19 +14,21 @@ import helper
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url): def make_soup(url):
user_agent = {'user-agent': helper.get_random_user_agent()} user_agent = {"user-agent": helper.get_random_user_agent()}
#print(user_agent) # print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent) http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url) r = http.request("GET", url)
return BeautifulSoup(r.data,'lxml') return BeautifulSoup(r.data, "lxml")
def search(search_term, max_pages): def search(search_term, max_pages):
#sort by newest listing # sort by newest listing
#display page 1 (first) # display page 1 (first)
#EU only # EU only
#60 listings in one result page # 60 listings in one result page
#No auction # No auction
found_listings = set() found_listings = set()
@ -36,24 +37,29 @@ def search(search_term, max_pages):
while not last_result_page: while not last_result_page:
result_page_added = 0 result_page_added = 0
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn=' + str(page_counter) url = (
#url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter) "https://www.ebay.de/sch/i.html?_from=R40&_nkw="
+ search_term.replace(" ", "+")
+ "&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn="
+ str(page_counter)
)
# url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter)
#print ("Web Page: ", url) # print ("Web Page: ", url)
soup = make_soup(url) soup = make_soup(url)
results = soup.find_all("div", class_="s-item__info clearfix") results = soup.find_all("div", class_="s-item__info clearfix")
for result in results: for result in results:
try: try:
#rec = { # rec = {
# 'epid': result.div.div.div.a['href'].split("?", 1)[0], # 'epid': result.div.div.div.a['href'].split("?", 1)[0],
#} # }
#print(result) # print(result)
found_listings.add(result.a['href'].split("?", 1)[0]) found_listings.add(result.a["href"].split("?", 1)[0])
#check if listing is allready stored # check if listing is already stored
#if not db.search(Query().epid == rec["epid"]): # if not db.search(Query().epid == rec["epid"]):
# result_page_added += 1 # result_page_added += 1
# db.insert(rec) # db.insert(rec)
@ -65,7 +71,6 @@ def search(search_term, max_pages):
return found_listings return found_listings
if __name__ == "__main__": if __name__ == "__main__":
print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages = 4))) print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages=4)))

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com """ Author: Hendrik Schutter, mail@hendrikschutter.com
@ -19,14 +18,15 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url): def make_soup(url):
user_agent = {'user-agent': helper.get_random_user_agent()} user_agent = {"user-agent": helper.get_random_user_agent()}
#print(user_agent) # print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent) http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url) r = http.request("GET", url)
return BeautifulSoup(r.data,'lxml') return BeautifulSoup(r.data, "lxml")
def seller_listings(seller_name, db): def seller_listings(seller_name, db):
#sort by newest listing # sort by newest listing
page_counter = 1 page_counter = 1
last_result_page = False last_result_page = False
@ -35,30 +35,45 @@ def seller_listings(seller_name, db):
result_page_added = 0 result_page_added = 0
if page_counter == 1: if page_counter == 1:
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) url = (
"https://www.ebay.de/sch/m.html?_ssn="
+ seller_name
+ "&_sop=10&_pgn="
+ str(page_counter)
)
else: else:
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) + '&_skc=' + str(60*(page_counter-1)) +'&rt=nc' url = (
"https://www.ebay.de/sch/m.html?_ssn="
+ seller_name
+ "&_sop=10&_pgn="
+ str(page_counter)
+ "&_skc="
+ str(60 * (page_counter - 1))
+ "&rt=nc"
)
#print ("Web Page: ", url) # print ("Web Page: ", url)
soup = make_soup(url) soup = make_soup(url)
results = soup.find_all("li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom") results = soup.find_all(
"li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom"
)
for result in results: for result in results:
#print(result) # print(result)
try: try:
rec = { rec = {
'epid': result.a['href'].split("?", 1)[0], "epid": result.a["href"].split("?", 1)[0],
} }
#check if listing is allready stored # check if listing is allready stored
if not db.search(Query().epid == rec["epid"]): if not db.search(Query().epid == rec["epid"]):
result_page_added += 1 result_page_added += 1
db.insert(rec) db.insert(rec)
except (AttributeError, KeyError) as ex: except (AttributeError, KeyError) as ex:
pass pass
if (result_page_added == 0): if result_page_added == 0:
last_result_page = True last_result_page = True
page_counter += 1 page_counter += 1
@ -67,4 +82,3 @@ if __name__ == "__main__":
seller_db = TinyDB("seller_db.json") seller_db = TinyDB("seller_db.json")
seller_listings("electro-network*com", seller_db) seller_listings("electro-network*com", seller_db)
print(len(seller_db.all())) print(len(seller_db.all()))