#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2022/10/25 Date of last modification: 2022/10/25 """ from bs4 import BeautifulSoup import datetime import urllib3 import sys import re import unidecode import helper urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def make_soup(url): user_agent = {'user-agent': helper.get_random_user_agent()} #print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) if (r.status != 200): raise FileNotFoundError("http error code " + str(r.status) + " for " + url) return BeautifulSoup(r.data,'lxml') def scrape_station(station_id): url = "https://www.clever-tanken.de/tankstelle_details/" + str(station_id) #print ("Web Page: ", url) soup = make_soup(url) #print(soup.find("span", {"itemprop": 'name' }).text) #print(soup.find("span", {"itemprop": 'streetAddress' }).text) #print(soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text) #print(soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text) station_metric_basename = soup.find("span", {"itemprop": 'name' }).text + "_"\ + soup.find("span", {"itemprop": 'streetAddress' }).text + "_"\ + soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text + "_"\ + soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text station_metric_basename = station_metric_basename.replace(" ", "" ).replace(".", "" ).replace(",", "").replace("-", "") station_metric_basename = unidecode.unidecode(station_metric_basename) #print(station_metric_basename) results = soup.find_all("div", class_="price-row row d-flex align-items-center") fuels = list() for result in results: try: fuel_name = unidecode.unidecode(result.div.div.text.replace(" ", "" ).replace(".", "" ).replace(",", "" ).replace("-", "")) fuel_price = round(float(result.find("span", {"id": re.compile('current-price-*') }).text)+0.009, 3) #print(fuel_name, end=": ") #print(fuel_price) fuels.append({'name': fuel_name, 'price': fuel_price}) except (AttributeError, KeyError) as ex: pass station = { 'id': station_id, 'station_metric_basename': station_metric_basename, 'fuels': fuels } return station if __name__ == "__main__": station = scrape_station(21907) print(station)