#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/05/31 Date of last modification: 2024/08/18 """ from bs4 import BeautifulSoup import datetime from tinydb import TinyDB, Query import urllib3 import sys import helper urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def make_soup(url): user_agent = {"user-agent": helper.get_random_user_agent()} # print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) return BeautifulSoup(r.data, "lxml") def scrape_listing(url): # print ("Web Page: ", url) soup = make_soup(url) # print(soup) # print(soup.find("div", class_="vim x-item-title").span.text) # print(soup.find("span", class_="ux-call-to-action__text").text) # print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.'))) # print(soup.find("img", loading="eager")["src"]) # print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li")) category = list() for span_subcategory in soup.find( "nav", class_="breadcrumbs breadcrumb--overflow" ).find_all("span"): category.append(span_subcategory.text) listing = { "title": soup.find("div", class_="vim x-item-title").span.text, "directbuy": ( True if soup.find("span", class_="ux-call-to-action__text").text == "Sofort-Kaufen" else False ), "price": float( soup.find("div", class_="x-price-primary") .find("span", class_="ux-textspans") .text.replace("EUR", "") .strip() .replace(",", ".") ), "category": category, "image": soup.find("img", loading="eager")["src"], "url": url, } return listing if __name__ == "__main__": # while(1): listing = scrape_listing("https://www.ebay.de/itm/226288543773") print(listing)