diff --git a/gas-station-exporter.py b/gas-station-exporter.py new file mode 100644 index 0000000..95c4f3d --- /dev/null +++ b/gas-station-exporter.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" Author: Hendrik Schutter, mail@hendrikschutter.com + Date of creation: 2022/10/23 + Date of last modification: 2022/10/23 + + pip install lxml beautifulsoup4 + +""" + +from http.server import BaseHTTPRequestHandler, HTTPServer +import time +import threading +from datetime import datetime +from urllib.parse import urlsplit, parse_qs +from random import randrange +import station_scraper + +hostName = "10.10.3.1" +serverPort = 29816 +exporter_prefix = "gas_" + +stations_ids = (20153, 21907, 183433, 159416, 30856, 16362, 12634) + +request_count = 0 +startTime = datetime.now() +station_metrics = list() +mutex = threading.Lock() + +class RequestHandler(BaseHTTPRequestHandler): + + def get_metrics(self): + global request_count + global station_metrics + global exporter_prefix + global mutex + mutex.acquire() + self.send_response(200) + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write(bytes(exporter_prefix + "expoter_duration_seconds_sum " + str(int((datetime.now() - startTime).total_seconds())) + "\n", "utf-8")) + self.wfile.write(bytes(exporter_prefix + "exporter_request_count " + str(request_count) + "\n", "utf-8")) + + for metric in station_metrics: + #print(metric) + self.wfile.write(bytes(exporter_prefix + metric + "\n", "utf-8")) + + mutex.release() + + def do_GET(self): + global request_count + request_count = request_count + 1 + print("Request: " + self.path) + if (self.path.startswith("/metrics")): + self.get_metrics() + else: + self.send_response(200) + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write(bytes("", "utf-8")) + self.wfile.write(bytes("gas station exporter", "utf-8")) + self.wfile.write(bytes("", "utf-8")) + self.wfile.write(bytes('

gas station exporter based on data from https://www.clever-tanken.de/

', "utf-8")) + self.wfile.write(bytes('

Metrics

', "utf-8")) + self.wfile.write(bytes('

obtain station id from https://www.clever-tanken.de/tankstelle_details/3569

', "utf-8")) + self.wfile.write(bytes("", "utf-8")) + self.wfile.write(bytes("", "utf-8")) + + +def update_metrics(): + while True: + print("Scrape") + global station_metrics + global mutex + mutex.acquire() + station_metrics.clear() + + for station_id in stations_ids: + station_data = station_scraper.scrape_station(station_id) + #print(station_data) + for fuel in station_data['fuels']: + #print(fuel) + station_metrics.append(station_data['station_metric_basename'] + "_" + fuel['name'] + " " + str(fuel['price'])) + mutex.release() + time.sleep(300) + + +def main(): + print("start") + + webServer = HTTPServer((hostName, serverPort), RequestHandler) + + print("Server started http://%s:%s" % (hostName, serverPort)) + + update_metrics_thread = threading.Thread(target=update_metrics, args=()) + update_metrics_thread.start() + + try: + webServer.serve_forever() + except KeyboardInterrupt: + pass + + webServer.server_close() + print("Server stopped.") + update_metrics_thread.join() + +if __name__ == "__main__": + main() diff --git a/helper.py b/helper.py new file mode 100644 index 0000000..30fb6aa --- /dev/null +++ b/helper.py @@ -0,0 +1,22 @@ + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" Author: Hendrik Schutter, mail@hendrikschutter.com + Date of creation: 2022/05/31 + Date of last modification: 2022/05/31 +""" +import random + +def get_random_user_agent(): + uastrings = [ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",\ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",\ + "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",\ + "Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",\ + "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",\ + "Dalvik/2.1.0 (Linux; U; Android 10; M2006C3MI MIUI/V12.0.15.0.QCRINXM)"\ + ] + + return random.choice(uastrings) \ No newline at end of file diff --git a/station_scraper.py b/station_scraper.py new file mode 100644 index 0000000..a2213d9 --- /dev/null +++ b/station_scraper.py @@ -0,0 +1,73 @@ + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" Author: Hendrik Schutter, mail@hendrikschutter.com + Date of creation: 2022/10/25 + Date of last modification: 2022/10/25 +""" + +from bs4 import BeautifulSoup +import datetime +import urllib3 +import sys +import re +import unidecode +import helper + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +def make_soup(url): + user_agent = {'user-agent': helper.get_random_user_agent()} + #print(user_agent) + http = urllib3.PoolManager(10, headers=user_agent) + r = http.request("GET", url) + return BeautifulSoup(r.data,'lxml') + +def scrape_station(station_id): + url = "https://www.clever-tanken.de/tankstelle_details/" + str(station_id) + #print ("Web Page: ", url) + + soup = make_soup(url) + + #print(soup.find("span", {"itemprop": 'name' }).text) + #print(soup.find("span", {"itemprop": 'streetAddress' }).text) + #print(soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text) + #print(soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text) + + + station_metric_basename = soup.find("span", {"itemprop": 'name' }).text + "_"\ + + soup.find("span", {"itemprop": 'streetAddress' }).text + "_"\ + + soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text + "_"\ + + soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text + + station_metric_basename = station_metric_basename.replace(" ", "" ).replace(".", "" ).replace(",", "" ) + + station_metric_basename = unidecode.unidecode(station_metric_basename) + + #print(station_metric_basename) + + results = soup.find_all("div", class_="price-row row d-flex align-items-center") + fuels = list() + for result in results: + try: + fuel_name = unidecode.unidecode(result.div.div.text.replace(" ", "" ).replace(".", "" ).replace(",", "" ).replace("-", "")) + fuel_price = round(float(result.find("span", {"id": re.compile('current-price-*') }).text)+0.009, 3) + + #print(fuel_name, end=": ") + #print(fuel_price) + + fuels.append({'name': fuel_name, 'price': fuel_price}) + except (AttributeError, KeyError) as ex: + pass + + station = { + 'id': station_id, + 'station_metric_basename': station_metric_basename, + 'fuels': fuels + } + return station + +if __name__ == "__main__": + station = scrape_station(21907) + print(station) +