70 lines
2.1 KiB
Python
70 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
|
Date of creation: 2022/05/31
|
|
Date of last modification: 2024/08/18
|
|
"""
|
|
|
|
from bs4 import BeautifulSoup
|
|
import datetime
|
|
from tinydb import TinyDB, Query
|
|
import urllib3
|
|
import sys
|
|
import helper
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
|
|
def make_soup(url):
|
|
user_agent = {"user-agent": helper.get_random_user_agent()}
|
|
# print(user_agent)
|
|
http = urllib3.PoolManager(10, headers=user_agent)
|
|
r = http.request("GET", url)
|
|
return BeautifulSoup(r.data, "lxml")
|
|
|
|
|
|
def scrape_listing(url):
|
|
# print ("Web Page: ", url)
|
|
|
|
soup = make_soup(url)
|
|
# print(soup)
|
|
# print(soup.find("div", class_="vim x-item-title").span.text)
|
|
# print(soup.find("span", class_="ux-call-to-action__text").text)
|
|
# print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
|
|
# print(soup.find("img", loading="eager")["src"])
|
|
|
|
# print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li"))
|
|
|
|
category = list()
|
|
for span_subcategory in soup.find(
|
|
"nav", class_="breadcrumbs breadcrumb--overflow"
|
|
).find_all("span"):
|
|
category.append(span_subcategory.text)
|
|
|
|
listing = {
|
|
"title": soup.find("div", class_="vim x-item-title").span.text,
|
|
"directbuy": (
|
|
True
|
|
if soup.find("span", class_="ux-call-to-action__text").text
|
|
== "Sofort-Kaufen"
|
|
else False
|
|
),
|
|
"price": float(
|
|
soup.find("div", class_="x-price-primary")
|
|
.find("span", class_="ux-textspans")
|
|
.text.replace("EUR", "")
|
|
.strip()
|
|
.replace(",", ".")
|
|
),
|
|
"category": category,
|
|
"image": soup.find("img", loading="eager")["src"],
|
|
"url": url,
|
|
}
|
|
return listing
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# while(1):
|
|
listing = scrape_listing("https://www.ebay.de/itm/226288543773")
|
|
print(listing)
|