only compare items in same category
This commit is contained in:
@ -1,9 +1,8 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2022/05/31
|
||||
Date of last modification: 2022/05/31
|
||||
Date of last modification: 2024/08/18
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
@ -15,34 +14,56 @@ import helper
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
def make_soup(url):
|
||||
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||
#print(user_agent)
|
||||
user_agent = {"user-agent": helper.get_random_user_agent()}
|
||||
# print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data,'lxml')
|
||||
return BeautifulSoup(r.data, "lxml")
|
||||
|
||||
|
||||
def scrape_listing(url):
|
||||
#print ("Web Page: ", url)
|
||||
# print ("Web Page: ", url)
|
||||
|
||||
soup = make_soup(url)
|
||||
#print(soup)
|
||||
#print(soup.find("div", class_="vim x-item-title").span.text)
|
||||
#print(soup.find("span", class_="ux-call-to-action__text").text)
|
||||
#print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
|
||||
#print(soup.find("img", loading="eager")["src"])
|
||||
# print(soup)
|
||||
# print(soup.find("div", class_="vim x-item-title").span.text)
|
||||
# print(soup.find("span", class_="ux-call-to-action__text").text)
|
||||
# print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
|
||||
# print(soup.find("img", loading="eager")["src"])
|
||||
|
||||
# print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li"))
|
||||
|
||||
category = list()
|
||||
for span_subcategory in soup.find(
|
||||
"nav", class_="breadcrumbs breadcrumb--overflow"
|
||||
).find_all("span"):
|
||||
category.append(span_subcategory.text)
|
||||
|
||||
listing = {
|
||||
'title': soup.find("div", class_="vim x-item-title").span.text,
|
||||
'directbuy' : True if soup.find("span", class_="ux-call-to-action__text").text == "Sofort-Kaufen" else False,
|
||||
'price': float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')),
|
||||
'image': soup.find("img", loading="eager")["src"],
|
||||
'url' : url
|
||||
"title": soup.find("div", class_="vim x-item-title").span.text,
|
||||
"directbuy": (
|
||||
True
|
||||
if soup.find("span", class_="ux-call-to-action__text").text
|
||||
== "Sofort-Kaufen"
|
||||
else False
|
||||
),
|
||||
"price": float(
|
||||
soup.find("div", class_="x-price-primary")
|
||||
.find("span", class_="ux-textspans")
|
||||
.text.replace("EUR", "")
|
||||
.strip()
|
||||
.replace(",", ".")
|
||||
),
|
||||
"category": category,
|
||||
"image": soup.find("img", loading="eager")["src"],
|
||||
"url": url,
|
||||
}
|
||||
return listing
|
||||
|
||||
if __name__ == "__main__":
|
||||
#while(1):
|
||||
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
|
||||
print(listing)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# while(1):
|
||||
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
|
||||
print(listing)
|
||||
|
||||
Reference in New Issue
Block a user