2022-10-26 20:33:01 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation : 2022 / 10 / 25
Date of last modification : 2022 / 10 / 25
"""
from bs4 import BeautifulSoup
import datetime
import urllib3
import sys
import re
import unidecode
import helper
urllib3 . disable_warnings ( urllib3 . exceptions . InsecureRequestWarning )
def make_soup ( url ) :
user_agent = { ' user-agent ' : helper . get_random_user_agent ( ) }
#print(user_agent)
http = urllib3 . PoolManager ( 10 , headers = user_agent )
r = http . request ( " GET " , url )
2022-11-02 18:46:48 +01:00
if ( r . status != 200 ) :
raise FileNotFoundError ( " http error code " + str ( r . status ) + " for " + url )
2022-10-26 20:33:01 +02:00
return BeautifulSoup ( r . data , ' lxml ' )
def scrape_station ( station_id ) :
url = " https://www.clever-tanken.de/tankstelle_details/ " + str ( station_id )
#print ("Web Page: ", url)
soup = make_soup ( url )
#print(soup.find("span", {"itemprop": 'name' }).text)
#print(soup.find("span", {"itemprop": 'streetAddress' }).text)
#print(soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text)
#print(soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text)
station_metric_basename = soup . find ( " span " , { " itemprop " : ' name ' } ) . text + " _ " \
+ soup . find ( " span " , { " itemprop " : ' streetAddress ' } ) . text + " _ " \
+ soup . find ( " span " , { " itemprop " : ' http://schema.org/postalCode ' } ) . text + " _ " \
+ soup . find ( " span " , { " itemprop " : ' http://schema.org/addressCountry ' } ) . text
2024-01-11 18:48:33 +01:00
station_metric_basename = station_metric_basename . replace ( " " , " " ) . replace ( " . " , " " ) . replace ( " , " , " " ) . replace ( " - " , " " )
2022-10-26 20:33:01 +02:00
station_metric_basename = unidecode . unidecode ( station_metric_basename )
#print(station_metric_basename)
results = soup . find_all ( " div " , class_ = " price-row row d-flex align-items-center " )
fuels = list ( )
for result in results :
try :
fuel_name = unidecode . unidecode ( result . div . div . text . replace ( " " , " " ) . replace ( " . " , " " ) . replace ( " , " , " " ) . replace ( " - " , " " ) )
fuel_price = round ( float ( result . find ( " span " , { " id " : re . compile ( ' current-price-* ' ) } ) . text ) + 0.009 , 3 )
#print(fuel_name, end=": ")
#print(fuel_price)
fuels . append ( { ' name ' : fuel_name , ' price ' : fuel_price } )
except ( AttributeError , KeyError ) as ex :
pass
station = {
' id ' : station_id ,
' station_metric_basename ' : station_metric_basename ,
' fuels ' : fuels
}
return station
if __name__ == " __main__ " :
station = scrape_station ( 21907 )
print ( station )