eBayCompetitorPriceCompare/scrape_listing.py

70 lines
2.1 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/05/31
Date of last modification: 2024/08/18
"""
from bs4 import BeautifulSoup
import datetime
from tinydb import TinyDB, Query
import urllib3
import sys
import helper
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url):
user_agent = {"user-agent": helper.get_random_user_agent()}
# print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url)
return BeautifulSoup(r.data, "lxml")
def scrape_listing(url):
# print ("Web Page: ", url)
soup = make_soup(url)
# print(soup)
# print(soup.find("div", class_="vim x-item-title").span.text)
# print(soup.find("span", class_="ux-call-to-action__text").text)
# print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
# print(soup.find("img", loading="eager")["src"])
# print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li"))
category = list()
for span_subcategory in soup.find(
"nav", class_="breadcrumbs breadcrumb--overflow"
).find_all("span"):
category.append(span_subcategory.text)
listing = {
"title": soup.find("div", class_="vim x-item-title").span.text,
"directbuy": (
True
if soup.find("span", class_="ux-call-to-action__text").text
== "Sofort-Kaufen"
else False
),
"price": float(
soup.find("div", class_="x-price-primary")
.find("span", class_="ux-textspans")
.text.replace("EUR", "")
.strip()
.replace(",", ".")
),
"category": category,
"image": soup.find("img", loading="eager")["src"],
"url": url,
}
return listing
if __name__ == "__main__":
# while(1):
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
print(listing)