parse needed data from listing page
This commit is contained in:
parent
1e6cb13e79
commit
b9d8774916
|
@ -0,0 +1,43 @@
|
|||
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2022/05/31
|
||||
Date of last modification: 2022/05/31
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import datetime
|
||||
from tinydb import TinyDB, Query
|
||||
import urllib3
|
||||
import sys
|
||||
import helper
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
def make_soup(url):
|
||||
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||
#print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data,'lxml')
|
||||
|
||||
def scrape_listing(url):
|
||||
print ("Web Page: ", url)
|
||||
|
||||
soup = make_soup(url)
|
||||
|
||||
#print(soup.find("div", class_="vim x-item-title").span.text)
|
||||
#print(soup.find("span", id="prcIsum")["content"])
|
||||
|
||||
listing = {
|
||||
'title': soup.find("div", class_="vim x-item-title").span.text,
|
||||
'price': float(soup.find("span", id="prcIsum")["content"])
|
||||
}
|
||||
|
||||
return listing
|
||||
|
||||
if __name__ == "__main__":
|
||||
listing = scrape_listing("https://www.ebay.de/itm/165445016341")
|
||||
print(listing)
|
||||
|
Loading…
Reference in New Issue