parse needed data from listing page
This commit is contained in:
parent
1e6cb13e79
commit
b9d8774916
43
scrape_listing.py
Normal file
43
scrape_listing.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||||
|
Date of creation: 2022/05/31
|
||||||
|
Date of last modification: 2022/05/31
|
||||||
|
"""
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import datetime
|
||||||
|
from tinydb import TinyDB, Query
|
||||||
|
import urllib3
|
||||||
|
import sys
|
||||||
|
import helper
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
def make_soup(url):
|
||||||
|
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||||
|
#print(user_agent)
|
||||||
|
http = urllib3.PoolManager(10, headers=user_agent)
|
||||||
|
r = http.request("GET", url)
|
||||||
|
return BeautifulSoup(r.data,'lxml')
|
||||||
|
|
||||||
|
def scrape_listing(url):
|
||||||
|
print ("Web Page: ", url)
|
||||||
|
|
||||||
|
soup = make_soup(url)
|
||||||
|
|
||||||
|
#print(soup.find("div", class_="vim x-item-title").span.text)
|
||||||
|
#print(soup.find("span", id="prcIsum")["content"])
|
||||||
|
|
||||||
|
listing = {
|
||||||
|
'title': soup.find("div", class_="vim x-item-title").span.text,
|
||||||
|
'price': float(soup.find("span", id="prcIsum")["content"])
|
||||||
|
}
|
||||||
|
|
||||||
|
return listing
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
listing = scrape_listing("https://www.ebay.de/itm/165445016341")
|
||||||
|
print(listing)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user