only compare items in same category
This commit is contained in:
parent
a34cbb5f71
commit
ebf379a716
135
compare.py
135
compare.py
@ -1,9 +1,8 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2022/05/31
|
||||
Date of last modification: 2022/05/31
|
||||
Date of last modification: 2024/08/18
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
@ -18,101 +17,123 @@ import export_html as exhtml
|
||||
|
||||
if __name__ == "__main__":
|
||||
seller_db = TinyDB("seller_db.json")
|
||||
database_lenght = len(seller_db.all())
|
||||
database_length = len(seller_db.all())
|
||||
comparison_results = list()
|
||||
seller_listing_data = False
|
||||
if database_lenght == 0:
|
||||
if database_length == 0:
|
||||
print("Unable to load seller shop database!")
|
||||
sys.exit(-1)
|
||||
#print("Loaded seller shop database: " + str(database_lenght) + " listings")
|
||||
pbar = tqdm(total=database_lenght) #print progress ba
|
||||
compare_start_timestamp = datetime.now() #set start time for comparing
|
||||
# print("Loaded seller shop database: " + str(database_lenght) + " listings")
|
||||
pbar = tqdm(total=database_length) # print progress ba
|
||||
compare_start_timestamp = datetime.now() # set start time for comparing
|
||||
for shop_listing_url in seller_db.all():
|
||||
#print(shop_listing_url["epid"])
|
||||
for retrieve_counter in range(5):
|
||||
# print(shop_listing_url["epid"])
|
||||
for retrieve_counter in range(5):
|
||||
try:
|
||||
seller_listing_data = sl.scrape_listing(shop_listing_url["epid"])
|
||||
break
|
||||
except:
|
||||
pass
|
||||
#print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
|
||||
|
||||
# print("Unable to retrieve seller listing data from: " + shop_listing_url["epid"])
|
||||
|
||||
if seller_listing_data:
|
||||
#print("\n\nCompare: " + seller_listing_data["title"] + " | " + str(seller_listing_data["price"]) + "€ | " + shop_listing_url["epid"])
|
||||
|
||||
comparison_result = {
|
||||
'seller_listing': seller_listing_data,
|
||||
'competitor_listings': list(),
|
||||
'max_price_delta': float(0.0)
|
||||
"seller_listing": seller_listing_data,
|
||||
"competitor_listings": list(),
|
||||
"max_price_delta": float(0.0),
|
||||
}
|
||||
|
||||
competitor_listings = se.search(seller_listing_data["title"], 1)
|
||||
#print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"])
|
||||
competitor_listings = se.search(seller_listing_data["title"], 1)
|
||||
#print("Found " + str(len(competitor_listings)) + " listings from competitors with term: " + seller_listing_data["title"])
|
||||
sys.stdout.flush()
|
||||
cheaper_listings = list()
|
||||
for competitor_listing_url in competitor_listings:
|
||||
#print(competitor_listing_url)
|
||||
#print(shop_listing_url["epid"])
|
||||
print(competitor_listing_url)
|
||||
# print(shop_listing_url["epid"])
|
||||
|
||||
if seller_db.search(Query().epid == competitor_listing_url):
|
||||
#print("Found listing from sellers shop --> ignore " + competitor_listing_url)
|
||||
# print("Found listing from sellers shop --> ignore " + competitor_listing_url)
|
||||
continue
|
||||
#else:
|
||||
#print("not from own shop")
|
||||
# else:
|
||||
# print("not from own shop")
|
||||
|
||||
for retrieve_counter_competitor in range(5):
|
||||
competitor_listing_data = False
|
||||
for retrieve_counter_competitor in range(5):
|
||||
try:
|
||||
#print("scraping : " + competitor_listing_url)
|
||||
competitor_listing_data = sl.scrape_listing(competitor_listing_url)
|
||||
# print("scraping : " + competitor_listing_url)
|
||||
competitor_listing_data = sl.scrape_listing(
|
||||
competitor_listing_url
|
||||
)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
#print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
|
||||
# print("Unable to retrieve competitor listing data from: " + competitor_listing_url)
|
||||
|
||||
if competitor_listing_data:
|
||||
#print(competitor_listing_data["price"])
|
||||
if (competitor_listing_data["price"] < seller_listing_data["price"]) and (competitor_listing_data["directbuy"] == True):
|
||||
#print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"]))
|
||||
cheaper_listings.append({
|
||||
'title': competitor_listing_data["title"],
|
||||
'price': competitor_listing_data["price"],
|
||||
'image': competitor_listing_data["image"],
|
||||
'url': competitor_listing_url})
|
||||
# print(competitor_listing_data["price"])
|
||||
|
||||
for cheaper_listing in sorted(cheaper_listings, key=lambda d: d['price']) :
|
||||
#print(cheaper_listing)
|
||||
#print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"])
|
||||
comparison_result['competitor_listings'].append(cheaper_listing)
|
||||
if comparison_result['max_price_delta'] == 0.0:
|
||||
comparison_result['max_price_delta'] = seller_listing_data["price"] - cheaper_listing["price"]
|
||||
if (competitor_listing_data["category"] != seller_listing_data["category"]):
|
||||
#print("Found competitor_listing is not in same category!")
|
||||
continue
|
||||
|
||||
if competitor_listing_data["directbuy"] != True:
|
||||
#print("Found competitor_listing is not direct buy!")
|
||||
continue
|
||||
|
||||
if (competitor_listing_data["price"] < seller_listing_data["price"]):
|
||||
# print("found cheaper competitor: " + str(competitor_listing_data["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + competitor_listing_url + " Type: "+ str(competitor_listing_data["directbuy"]))
|
||||
cheaper_listings.append(
|
||||
{
|
||||
"title": competitor_listing_data["title"],
|
||||
"price": competitor_listing_data["price"],
|
||||
"image": competitor_listing_data["image"],
|
||||
"url": competitor_listing_url,
|
||||
}
|
||||
)
|
||||
|
||||
for cheaper_listing in sorted(cheaper_listings, key=lambda d: d["price"]):
|
||||
# print(cheaper_listing)
|
||||
# print("found cheaper competitor: " + str(cheaper_listing["price"]) + "€ instead: " + str(seller_listing_data["price"]) + "€ ---> " + cheaper_listing["url"])
|
||||
comparison_result["competitor_listings"].append(cheaper_listing)
|
||||
if comparison_result["max_price_delta"] == 0.0:
|
||||
comparison_result["max_price_delta"] = (
|
||||
seller_listing_data["price"] - cheaper_listing["price"]
|
||||
)
|
||||
|
||||
if cheaper_listings:
|
||||
comparison_results.append(comparison_result)
|
||||
pbar.update(1)
|
||||
#break
|
||||
# break
|
||||
pbar.close()
|
||||
print("\nFinished comparing! Found " + str(len(comparison_results)) + " possibly cheaper listings")
|
||||
print(
|
||||
"\nFinished comparing! Found "
|
||||
+ str(len(comparison_results))
|
||||
+ " possibly cheaper listings"
|
||||
)
|
||||
|
||||
now = datetime.now() # current date and time
|
||||
now = datetime.now() # current date and time
|
||||
|
||||
duration_compare = datetime.now() - compare_start_timestamp
|
||||
duration_compare = datetime.now() - compare_start_timestamp
|
||||
hours, remainder = divmod(duration_compare.total_seconds(), 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
duration_compare = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
|
||||
duration_compare = (
|
||||
str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
|
||||
)
|
||||
|
||||
exp = exhtml.exporter("./html_out/")
|
||||
|
||||
for comparison in sorted(comparison_results, key=lambda d: d['max_price_delta'], reverse=True):
|
||||
exp.export_comparison(comparison['seller_listing'], comparison['competitor_listings'])
|
||||
for comparison in sorted(
|
||||
comparison_results, key=lambda d: d["max_price_delta"], reverse=True
|
||||
):
|
||||
exp.export_comparison(
|
||||
comparison["seller_listing"], comparison["competitor_listings"]
|
||||
)
|
||||
|
||||
exp.export_startpage(str(database_lenght), len(comparison_results), duration_compare, now.strftime("%m/%d/%Y, %H:%M:%S"))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
exp.export_startpage(
|
||||
str(database_length),
|
||||
len(comparison_results),
|
||||
duration_compare,
|
||||
now.strftime("%m/%d/%Y, %H:%M:%S"),
|
||||
)
|
||||
|
108
export_html.py
108
export_html.py
@ -1,4 +1,3 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
@ -11,41 +10,52 @@ import os
|
||||
import template_html as thtml
|
||||
import shutil
|
||||
|
||||
|
||||
class exporter:
|
||||
export_dir=""
|
||||
export_dir = ""
|
||||
tsStart = 0
|
||||
counter = 0
|
||||
|
||||
def __init__(self, path):
|
||||
self.export_dir = path
|
||||
self.tsStart = datetime.now() #set start time for exporting
|
||||
self.tsStart = datetime.now() # set start time for exporting
|
||||
try:
|
||||
os.mkdir(self.export_dir)
|
||||
except FileExistsError:
|
||||
pass
|
||||
try:
|
||||
os.mkdir(os.path.join(self.export_dir,"compare/"))
|
||||
os.mkdir(os.path.join(self.export_dir, "compare/"))
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
self.copy_static_export()
|
||||
|
||||
|
||||
def copy_static_export(self):
|
||||
try:
|
||||
os.mkdir(os.path.join(self.export_dir,"css/"))
|
||||
os.mkdir(os.path.join(self.export_dir, "css/"))
|
||||
except FileExistsError:
|
||||
pass
|
||||
try:
|
||||
os.mkdir(os.path.join(self.export_dir,"data/"))
|
||||
try:
|
||||
os.mkdir(os.path.join(self.export_dir, "data/"))
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
shutil.copy("./html/css/w3.css", os.path.join(self.export_dir,"css/","w3.css"))
|
||||
shutil.copy("./html/data/favicon.ico", os.path.join(self.export_dir,"data/","favicon.ico"))
|
||||
shutil.copy("./html/data/icon.png", os.path.join(self.export_dir,"data/","icon.png"))
|
||||
shutil.copy(
|
||||
"./html/css/w3.css", os.path.join(self.export_dir, "css/", "w3.css")
|
||||
)
|
||||
shutil.copy(
|
||||
"./html/data/favicon.ico",
|
||||
os.path.join(self.export_dir, "data/", "favicon.ico"),
|
||||
)
|
||||
shutil.copy(
|
||||
"./html/data/icon.png", os.path.join(self.export_dir, "data/", "icon.png")
|
||||
)
|
||||
|
||||
def export_comparison(self, seller_listing, competitor_listings):
|
||||
self.counter +=1
|
||||
f = open(os.path.join(self.export_dir, "compare/", str(self.counter) + ".html"), "a")
|
||||
self.counter += 1
|
||||
f = open(
|
||||
os.path.join(self.export_dir, "compare/", str(self.counter) + ".html"), "a"
|
||||
)
|
||||
|
||||
f.write(thtml.html_comparison_head())
|
||||
f.write("<body>")
|
||||
@ -54,58 +64,72 @@ class exporter:
|
||||
f.write(thtml.html_comparison_competitor_list_header())
|
||||
competitor_listing_counter = 0
|
||||
for competitor_listing in competitor_listings:
|
||||
competitor_listing_counter +=1
|
||||
f.write(thtml.html_comparison_competitor_listing(competitor_listing, competitor_listing_counter))
|
||||
competitor_listing_counter += 1
|
||||
f.write(
|
||||
thtml.html_comparison_competitor_listing(
|
||||
competitor_listing, competitor_listing_counter
|
||||
)
|
||||
)
|
||||
f.write(thtml.html_comparison_trailer())
|
||||
f.close()
|
||||
|
||||
def export_startpage(self, seller_listings_count, cheaper_listings_count, compare_time, date):
|
||||
def export_startpage(
|
||||
self, seller_listings_count, cheaper_listings_count, compare_time, date
|
||||
):
|
||||
|
||||
duration_export = datetime.now() - self.tsStart
|
||||
duration_export = datetime.now() - self.tsStart
|
||||
hours, remainder = divmod(duration_export.total_seconds(), 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
duration_export = str(hours) +"h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
|
||||
duration_export = (
|
||||
str(hours) + "h " + str(minutes) + "m " + str(round(seconds, 2)) + "s"
|
||||
)
|
||||
|
||||
f = open(os.path.join(self.export_dir, "index.html"), "a")
|
||||
f.write(thtml.html_startpage_head())
|
||||
f.write(thtml.html_startpage_info(seller_listings_count, cheaper_listings_count, compare_time, duration_export, date))
|
||||
f.write(
|
||||
thtml.html_startpage_info(
|
||||
seller_listings_count,
|
||||
cheaper_listings_count,
|
||||
compare_time,
|
||||
duration_export,
|
||||
date,
|
||||
)
|
||||
)
|
||||
f.write(thtml.html_startpage_trailer())
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
seller_listing_dummy = {
|
||||
'title': "Seller Title",
|
||||
'price': float(42.42),
|
||||
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
'url' : "https://www.ebay.de/itm/165508291809"
|
||||
"title": "Seller Title",
|
||||
"price": float(42.42),
|
||||
"image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
"url": "https://www.ebay.de/itm/165508291809",
|
||||
}
|
||||
|
||||
competitor_listings_dummy = [
|
||||
{
|
||||
'title': "Competitor Title 01",
|
||||
'price': float(40.42),
|
||||
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
'url' : "https://www.ebay.de/itm/165508291809"
|
||||
},
|
||||
"title": "Competitor Title 01",
|
||||
"price": float(40.42),
|
||||
"image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
"url": "https://www.ebay.de/itm/165508291809",
|
||||
},
|
||||
{
|
||||
'title': "Competitor Title 02",
|
||||
'price': float(41.42),
|
||||
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
'url' : "https://www.ebay.de/itm/165508291809"
|
||||
},
|
||||
"title": "Competitor Title 02",
|
||||
"price": float(41.42),
|
||||
"image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
"url": "https://www.ebay.de/itm/165508291809",
|
||||
},
|
||||
{
|
||||
'title': "Competitor Title 03",
|
||||
'price': float(42.00),
|
||||
'image': "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
'url' : "https://www.ebay.de/itm/165508291809"
|
||||
}
|
||||
"title": "Competitor Title 03",
|
||||
"price": float(42.00),
|
||||
"image": "https://i.ebayimg.com/images/g/7lAAAOSw~ixieBVP/s-l500.jpg",
|
||||
"url": "https://www.ebay.de/itm/165508291809",
|
||||
},
|
||||
]
|
||||
|
||||
exp = exporter("./html_out/")
|
||||
exp.export_comparison(seller_listing_dummy, competitor_listings_dummy)
|
||||
|
||||
exp.export_startpage(10, 2, 0, "d")
|
||||
|
||||
|
||||
|
||||
|
22
helper.py
22
helper.py
@ -1,4 +1,3 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
@ -7,15 +6,16 @@
|
||||
"""
|
||||
import random
|
||||
|
||||
|
||||
def get_random_user_agent():
|
||||
uastrings = [
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",\
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",\
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",\
|
||||
"Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",\
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0"\
|
||||
]
|
||||
|
||||
return random.choice(uastrings)+str(random.randrange(255))
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
|
||||
"Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",
|
||||
]
|
||||
|
||||
return random.choice(uastrings) + str(random.randrange(255))
|
||||
|
@ -1,9 +1,8 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2022/05/31
|
||||
Date of last modification: 2022/05/31
|
||||
Date of last modification: 2024/08/18
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
@ -15,34 +14,56 @@ import helper
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
def make_soup(url):
|
||||
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||
#print(user_agent)
|
||||
user_agent = {"user-agent": helper.get_random_user_agent()}
|
||||
# print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data,'lxml')
|
||||
return BeautifulSoup(r.data, "lxml")
|
||||
|
||||
|
||||
def scrape_listing(url):
|
||||
#print ("Web Page: ", url)
|
||||
# print ("Web Page: ", url)
|
||||
|
||||
soup = make_soup(url)
|
||||
#print(soup)
|
||||
#print(soup.find("div", class_="vim x-item-title").span.text)
|
||||
#print(soup.find("span", class_="ux-call-to-action__text").text)
|
||||
#print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
|
||||
#print(soup.find("img", loading="eager")["src"])
|
||||
# print(soup)
|
||||
# print(soup.find("div", class_="vim x-item-title").span.text)
|
||||
# print(soup.find("span", class_="ux-call-to-action__text").text)
|
||||
# print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
|
||||
# print(soup.find("img", loading="eager")["src"])
|
||||
|
||||
# print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li"))
|
||||
|
||||
category = list()
|
||||
for span_subcategory in soup.find(
|
||||
"nav", class_="breadcrumbs breadcrumb--overflow"
|
||||
).find_all("span"):
|
||||
category.append(span_subcategory.text)
|
||||
|
||||
listing = {
|
||||
'title': soup.find("div", class_="vim x-item-title").span.text,
|
||||
'directbuy' : True if soup.find("span", class_="ux-call-to-action__text").text == "Sofort-Kaufen" else False,
|
||||
'price': float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')),
|
||||
'image': soup.find("img", loading="eager")["src"],
|
||||
'url' : url
|
||||
"title": soup.find("div", class_="vim x-item-title").span.text,
|
||||
"directbuy": (
|
||||
True
|
||||
if soup.find("span", class_="ux-call-to-action__text").text
|
||||
== "Sofort-Kaufen"
|
||||
else False
|
||||
),
|
||||
"price": float(
|
||||
soup.find("div", class_="x-price-primary")
|
||||
.find("span", class_="ux-textspans")
|
||||
.text.replace("EUR", "")
|
||||
.strip()
|
||||
.replace(",", ".")
|
||||
),
|
||||
"category": category,
|
||||
"image": soup.find("img", loading="eager")["src"],
|
||||
"url": url,
|
||||
}
|
||||
return listing
|
||||
|
||||
if __name__ == "__main__":
|
||||
#while(1):
|
||||
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
|
||||
print(listing)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# while(1):
|
||||
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
|
||||
print(listing)
|
||||
|
@ -1,9 +1,8 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2022/05/31
|
||||
Date of last modification: 2022/05/31
|
||||
Date of last modification: 2024/08/18
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
@ -15,57 +14,63 @@ import helper
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
def make_soup(url):
|
||||
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||
#print(user_agent)
|
||||
user_agent = {"user-agent": helper.get_random_user_agent()}
|
||||
# print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data,'lxml')
|
||||
return BeautifulSoup(r.data, "lxml")
|
||||
|
||||
|
||||
def search(search_term, max_pages):
|
||||
#sort by newest listing
|
||||
#display page 1 (first)
|
||||
#EU only
|
||||
#60 listings in one result page
|
||||
#No auction
|
||||
|
||||
# sort by newest listing
|
||||
# display page 1 (first)
|
||||
# EU only
|
||||
# 60 listings in one result page
|
||||
# No auction
|
||||
|
||||
found_listings = set()
|
||||
|
||||
page_counter = 1
|
||||
last_result_page = False
|
||||
|
||||
|
||||
while not last_result_page:
|
||||
result_page_added = 0
|
||||
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn=' + str(page_counter)
|
||||
#url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter)
|
||||
url = (
|
||||
"https://www.ebay.de/sch/i.html?_from=R40&_nkw="
|
||||
+ search_term.replace(" ", "+")
|
||||
+ "&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn="
|
||||
+ str(page_counter)
|
||||
)
|
||||
# url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter)
|
||||
|
||||
#print ("Web Page: ", url)
|
||||
# print ("Web Page: ", url)
|
||||
|
||||
soup = make_soup(url)
|
||||
results = soup.find_all("div", class_="s-item__info clearfix")
|
||||
|
||||
for result in results:
|
||||
try:
|
||||
#rec = {
|
||||
# 'epid': result.div.div.div.a['href'].split("?", 1)[0],
|
||||
#}
|
||||
#print(result)
|
||||
found_listings.add(result.a['href'].split("?", 1)[0])
|
||||
# rec = {
|
||||
# 'epid': result.div.div.div.a['href'].split("?", 1)[0],
|
||||
# }
|
||||
# print(result)
|
||||
found_listings.add(result.a["href"].split("?", 1)[0])
|
||||
|
||||
#check if listing is allready stored
|
||||
#if not db.search(Query().epid == rec["epid"]):
|
||||
# result_page_added += 1
|
||||
# db.insert(rec)
|
||||
# check if listing is already stored
|
||||
# if not db.search(Query().epid == rec["epid"]):
|
||||
# result_page_added += 1
|
||||
# db.insert(rec)
|
||||
|
||||
except (AttributeError, KeyError) as ex:
|
||||
pass
|
||||
if (result_page_added == 0) or (page_counter == max_pages):
|
||||
if (result_page_added == 0) or (page_counter == max_pages):
|
||||
last_result_page = True
|
||||
page_counter += 1
|
||||
|
||||
|
||||
return found_listings
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages = 4)))
|
||||
|
||||
|
||||
print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages=4)))
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
@ -19,46 +18,62 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
def make_soup(url):
|
||||
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||
#print(user_agent)
|
||||
user_agent = {"user-agent": helper.get_random_user_agent()}
|
||||
# print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data,'lxml')
|
||||
return BeautifulSoup(r.data, "lxml")
|
||||
|
||||
|
||||
def seller_listings(seller_name, db):
|
||||
#sort by newest listing
|
||||
# sort by newest listing
|
||||
|
||||
page_counter = 1
|
||||
last_result_page = False
|
||||
|
||||
|
||||
while not last_result_page:
|
||||
result_page_added = 0
|
||||
|
||||
if page_counter == 1:
|
||||
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter)
|
||||
url = (
|
||||
"https://www.ebay.de/sch/m.html?_ssn="
|
||||
+ seller_name
|
||||
+ "&_sop=10&_pgn="
|
||||
+ str(page_counter)
|
||||
)
|
||||
else:
|
||||
url = 'https://www.ebay.de/sch/m.html?_ssn='+ seller_name + '&_sop=10&_pgn='+ str(page_counter) + '&_skc=' + str(60*(page_counter-1)) +'&rt=nc'
|
||||
url = (
|
||||
"https://www.ebay.de/sch/m.html?_ssn="
|
||||
+ seller_name
|
||||
+ "&_sop=10&_pgn="
|
||||
+ str(page_counter)
|
||||
+ "&_skc="
|
||||
+ str(60 * (page_counter - 1))
|
||||
+ "&rt=nc"
|
||||
)
|
||||
|
||||
#print ("Web Page: ", url)
|
||||
# print ("Web Page: ", url)
|
||||
|
||||
soup = make_soup(url)
|
||||
results = soup.find_all("li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom")
|
||||
results = soup.find_all(
|
||||
"li", class_="s-item s-item__dsa-on-bottom s-item__pl-on-bottom"
|
||||
)
|
||||
|
||||
for result in results:
|
||||
#print(result)
|
||||
# print(result)
|
||||
try:
|
||||
rec = {
|
||||
'epid': result.a['href'].split("?", 1)[0],
|
||||
"epid": result.a["href"].split("?", 1)[0],
|
||||
}
|
||||
|
||||
#check if listing is allready stored
|
||||
# check if listing is allready stored
|
||||
if not db.search(Query().epid == rec["epid"]):
|
||||
result_page_added += 1
|
||||
db.insert(rec)
|
||||
except (AttributeError, KeyError) as ex:
|
||||
pass
|
||||
|
||||
if (result_page_added == 0):
|
||||
if result_page_added == 0:
|
||||
last_result_page = True
|
||||
page_counter += 1
|
||||
|
||||
@ -67,4 +82,3 @@ if __name__ == "__main__":
|
||||
seller_db = TinyDB("seller_db.json")
|
||||
seller_listings("electro-network*com", seller_db)
|
||||
print(len(seller_db.all()))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user