gas-station-exporter/station_scraper.py

76 lines
2.6 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2022/10/25
Date of last modification: 2022/10/25
"""
from bs4 import BeautifulSoup
import datetime
import urllib3
import sys
import re
import unidecode
import helper
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url):
user_agent = {'user-agent': helper.get_random_user_agent()}
#print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url)
if (r.status != 200):
raise FileNotFoundError("http error code " + str(r.status) + " for " + url)
return BeautifulSoup(r.data,'lxml')
def scrape_station(station_id):
url = "https://www.clever-tanken.de/tankstelle_details/" + str(station_id)
#print ("Web Page: ", url)
soup = make_soup(url)
#print(soup.find("span", {"itemprop": 'name' }).text)
#print(soup.find("span", {"itemprop": 'streetAddress' }).text)
#print(soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text)
#print(soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text)
station_metric_basename = soup.find("span", {"itemprop": 'name' }).text + "_"\
+ soup.find("span", {"itemprop": 'streetAddress' }).text + "_"\
+ soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text + "_"\
+ soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text
station_metric_basename = station_metric_basename.replace(" ", "" ).replace(".", "" ).replace(",", "").replace("-", "")
station_metric_basename = unidecode.unidecode(station_metric_basename)
#print(station_metric_basename)
results = soup.find_all("div", class_="price-row row d-flex align-items-center")
fuels = list()
for result in results:
try:
fuel_name = unidecode.unidecode(result.div.div.text.replace(" ", "" ).replace(".", "" ).replace(",", "" ).replace("-", ""))
fuel_price = round(float(result.find("span", {"id": re.compile('current-price-*') }).text)+0.009, 3)
#print(fuel_name, end=": ")
#print(fuel_price)
fuels.append({'name': fuel_name, 'price': fuel_price})
except (AttributeError, KeyError) as ex:
pass
station = {
'id': station_id,
'station_metric_basename': station_metric_basename,
'fuels': fuels
}
return station
if __name__ == "__main__":
station = scrape_station(21907)
print(station)