76 lines
2.6 KiB
Python
76 lines
2.6 KiB
Python
|
|
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
|
Date of creation: 2022/10/25
|
|
Date of last modification: 2022/10/25
|
|
"""
|
|
|
|
from bs4 import BeautifulSoup
|
|
import datetime
|
|
import urllib3
|
|
import sys
|
|
import re
|
|
import unidecode
|
|
import helper
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
def make_soup(url):
|
|
user_agent = {'user-agent': helper.get_random_user_agent()}
|
|
#print(user_agent)
|
|
http = urllib3.PoolManager(10, headers=user_agent)
|
|
r = http.request("GET", url)
|
|
if (r.status != 200):
|
|
raise FileNotFoundError("http error code " + str(r.status) + " for " + url)
|
|
return BeautifulSoup(r.data,'lxml')
|
|
|
|
def scrape_station(station_id):
|
|
url = "https://www.clever-tanken.de/tankstelle_details/" + str(station_id)
|
|
#print ("Web Page: ", url)
|
|
|
|
soup = make_soup(url)
|
|
|
|
#print(soup.find("span", {"itemprop": 'name' }).text)
|
|
#print(soup.find("span", {"itemprop": 'streetAddress' }).text)
|
|
#print(soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text)
|
|
#print(soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text)
|
|
|
|
|
|
station_metric_basename = soup.find("span", {"itemprop": 'name' }).text + "_"\
|
|
+ soup.find("span", {"itemprop": 'streetAddress' }).text + "_"\
|
|
+ soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text + "_"\
|
|
+ soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text
|
|
|
|
station_metric_basename = station_metric_basename.replace(" ", "" ).replace(".", "" ).replace(",", "").replace("-", "")
|
|
|
|
station_metric_basename = unidecode.unidecode(station_metric_basename)
|
|
|
|
#print(station_metric_basename)
|
|
|
|
results = soup.find_all("div", class_="price-row row d-flex align-items-center")
|
|
fuels = list()
|
|
for result in results:
|
|
try:
|
|
fuel_name = unidecode.unidecode(result.div.div.text.replace(" ", "" ).replace(".", "" ).replace(",", "" ).replace("-", ""))
|
|
fuel_price = round(float(result.find("span", {"id": re.compile('current-price-*') }).text)+0.009, 3)
|
|
|
|
#print(fuel_name, end=": ")
|
|
#print(fuel_price)
|
|
|
|
fuels.append({'name': fuel_name, 'price': fuel_price})
|
|
except (AttributeError, KeyError) as ex:
|
|
pass
|
|
|
|
station = {
|
|
'id': station_id,
|
|
'station_metric_basename': station_metric_basename,
|
|
'fuels': fuels
|
|
}
|
|
return station
|
|
|
|
if __name__ == "__main__":
|
|
station = scrape_station(21907)
|
|
print(station)
|
|
|