diff --git a/scrape_listing.py b/scrape_listing.py new file mode 100644 index 0000000..c704afe --- /dev/null +++ b/scrape_listing.py @@ -0,0 +1,43 @@ + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" Author: Hendrik Schutter, mail@hendrikschutter.com + Date of creation: 2022/05/31 + Date of last modification: 2022/05/31 +""" + +from bs4 import BeautifulSoup +import datetime +from tinydb import TinyDB, Query +import urllib3 +import sys +import helper + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +def make_soup(url): + user_agent = {'user-agent': helper.get_random_user_agent()} + #print(user_agent) + http = urllib3.PoolManager(10, headers=user_agent) + r = http.request("GET", url) + return BeautifulSoup(r.data,'lxml') + +def scrape_listing(url): + print ("Web Page: ", url) + + soup = make_soup(url) + + #print(soup.find("div", class_="vim x-item-title").span.text) + #print(soup.find("span", id="prcIsum")["content"]) + + listing = { + 'title': soup.find("div", class_="vim x-item-title").span.text, + 'price': float(soup.find("span", id="prcIsum")["content"]) + } + + return listing + +if __name__ == "__main__": + listing = scrape_listing("https://www.ebay.de/itm/165445016341") + print(listing) +