70 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			70 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| # -*- coding: utf-8 -*-
 | |
| """ Author:                     Hendrik Schutter, mail@hendrikschutter.com
 | |
|     Date of creation:           2022/05/31
 | |
|     Date of last modification:  2024/08/18
 | |
| """
 | |
| 
 | |
| from bs4 import BeautifulSoup
 | |
| import datetime
 | |
| from tinydb import TinyDB, Query
 | |
| import urllib3
 | |
| import sys
 | |
| import helper
 | |
| 
 | |
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | |
| 
 | |
| 
 | |
| def make_soup(url):
 | |
|     user_agent = {"user-agent": helper.get_random_user_agent()}
 | |
|     # print(user_agent)
 | |
|     http = urllib3.PoolManager(10, headers=user_agent)
 | |
|     r = http.request("GET", url)
 | |
|     return BeautifulSoup(r.data, "lxml")
 | |
| 
 | |
| 
 | |
| def scrape_listing(url):
 | |
|     # print ("Web Page: ", url)
 | |
| 
 | |
|     soup = make_soup(url)
 | |
|     # print(soup)
 | |
|     # print(soup.find("div", class_="vim x-item-title").span.text)
 | |
|     # print(soup.find("span", class_="ux-call-to-action__text").text)
 | |
|     # print(float(soup.find('div', class_='x-price-primary').find('span', class_='ux-textspans').text.replace("EUR", "").strip().replace(',', '.')))
 | |
|     # print(soup.find("img", loading="eager")["src"])
 | |
| 
 | |
|     # print(soup.find("nav", class_="breadcrumbs breadcrumb--overflow").find("li"))
 | |
| 
 | |
|     category = list()
 | |
|     for span_subcategory in soup.find(
 | |
|         "nav", class_="breadcrumbs breadcrumb--overflow"
 | |
|     ).find_all("span"):
 | |
|         category.append(span_subcategory.text)
 | |
| 
 | |
|     listing = {
 | |
|         "title": soup.find("div", class_="vim x-item-title").span.text,
 | |
|         "directbuy": (
 | |
|             True
 | |
|             if soup.find("span", class_="ux-call-to-action__text").text
 | |
|             == "Sofort-Kaufen"
 | |
|             else False
 | |
|         ),
 | |
|         "price": float(
 | |
|             soup.find("div", class_="x-price-primary")
 | |
|             .find("span", class_="ux-textspans")
 | |
|             .text.replace("EUR", "")
 | |
|             .strip()
 | |
|             .replace(",", ".")
 | |
|         ),
 | |
|         "category": category,
 | |
|         "image": soup.find("img", loading="eager")["src"],
 | |
|         "url": url,
 | |
|     }
 | |
|     return listing
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     # while(1):
 | |
|     listing = scrape_listing("https://www.ebay.de/itm/226288543773")
 | |
|     print(listing)
 |