only compare items in same category

This commit is contained in:
2024-08-18 22:49:48 +02:00
parent a34cbb5f71
commit ebf379a716
6 changed files with 259 additions and 174 deletions

View File

@ -1,9 +1,8 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31
Date of last modification: 2022/05/31
Date of last modification: 2024/08/18
"""
from bs4 import BeautifulSoup
@ -15,34 +14,56 @@ import helper
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url):
user_agent = {'user-agent': helper.get_random_user_agent()}
#print(user_agent)
user_agent = {"user-agent": helper.get_random_user_agent()}
# print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url)
return BeautifulSoup(r.data,'lxml')
return BeautifulSoup(r.data, "lxml")
def scrape_listing(url):
#print ("Web Page: ", url)
# print ("Web Page: ", url)
soup = make_soup(url)
#print(soup)
#print(soup.find("div", class_="vim x-item-title").span.text)
#print(soup.find("span", class_="ux-call-to-action__text").text)
#print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
#print(soup.find("img", loading="eager")["src"])
# print(soup)
# print(soup.find("div", class_="vim x-item-title").span.text)
# print(soup.find("span", class_="ux-call-to-action__text").text)
# print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
# print(soup.find("img", loading="eager")["src"])
# print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li"))
category = list()
for span_subcategory in soup.find(
"nav", class_="breadcrumbs breadcrumb--overflow"
).find_all("span"):
category.append(span_subcategory.text)
listing = {
'title': soup.find("div", class_="vim x-item-title").span.text,
'directbuy' : True if soup.find("span", class_="ux-call-to-action__text").text == "Sofort-Kaufen" else False,
'price': float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')),
'image': soup.find("img", loading="eager")["src"],
'url' : url
"title": soup.find("div", class_="vim x-item-title").span.text,
"directbuy": (
True
if soup.find("span", class_="ux-call-to-action__text").text
== "Sofort-Kaufen"
else False
),
"price": float(
soup.find("div", class_="x-price-primary")
.find("span", class_="ux-textspans")
.text.replace("EUR", "")
.strip()
.replace(",", ".")
),
"category": category,
"image": soup.find("img", loading="eager")["src"],
"url": url,
}
return listing
if __name__ == "__main__":
#while(1):
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
print(listing)
if __name__ == "__main__":
# while(1):
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
print(listing)