46 lines
1.3 KiB
Python
46 lines
1.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
|
Date of creation: 2022/05/31
|
|
Date of last modification: 2022/05/31
|
|
"""
|
|
|
|
from bs4 import BeautifulSoup
|
|
import datetime
|
|
from tinydb import TinyDB, Query
|
|
import urllib3
|
|
import sys
|
|
import helper
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
def make_soup(url):
|
|
user_agent = {'user-agent': helper.get_random_user_agent()}
|
|
#print(user_agent)
|
|
http = urllib3.PoolManager(10, headers=user_agent)
|
|
r = http.request("GET", url)
|
|
return BeautifulSoup(r.data,'lxml')
|
|
|
|
def scrape_listing(url):
|
|
#print ("Web Page: ", url)
|
|
|
|
soup = make_soup(url)
|
|
|
|
#print(soup.find("div", class_="vim x-item-title").span.text)
|
|
#print(soup.find("span", itemprop="price")["content"])
|
|
#print(soup.find("img", itemprop="image")["src"])
|
|
|
|
listing = {
|
|
'title': soup.find("div", class_="vim x-item-title").span.text,
|
|
'price': float(soup.find("span", itemprop="price")["content"]),
|
|
'image': soup.find("img", itemprop="image")["src"],
|
|
'url' : url
|
|
}
|
|
return listing
|
|
|
|
if __name__ == "__main__":
|
|
listing = scrape_listing("https://www.ebay.de/itm/162861653490")
|
|
print(listing)
|
|
|