first working state
This commit is contained in:
parent
09fa068424
commit
0f4780e5b6
|
@ -0,0 +1,108 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||||
|
Date of creation: 2022/10/23
|
||||||
|
Date of last modification: 2022/10/23
|
||||||
|
|
||||||
|
pip install lxml beautifulsoup4
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
from datetime import datetime
|
||||||
|
from urllib.parse import urlsplit, parse_qs
|
||||||
|
from random import randrange
|
||||||
|
import station_scraper
|
||||||
|
|
||||||
|
hostName = "10.10.3.1"
|
||||||
|
serverPort = 29816
|
||||||
|
exporter_prefix = "gas_"
|
||||||
|
|
||||||
|
stations_ids = (20153, 21907, 183433, 159416, 30856, 16362, 12634)
|
||||||
|
|
||||||
|
request_count = 0
|
||||||
|
startTime = datetime.now()
|
||||||
|
station_metrics = list()
|
||||||
|
mutex = threading.Lock()
|
||||||
|
|
||||||
|
class RequestHandler(BaseHTTPRequestHandler):
|
||||||
|
|
||||||
|
def get_metrics(self):
|
||||||
|
global request_count
|
||||||
|
global station_metrics
|
||||||
|
global exporter_prefix
|
||||||
|
global mutex
|
||||||
|
mutex.acquire()
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-type", "text/html")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(bytes(exporter_prefix + "expoter_duration_seconds_sum " + str(int((datetime.now() - startTime).total_seconds())) + "\n", "utf-8"))
|
||||||
|
self.wfile.write(bytes(exporter_prefix + "exporter_request_count " + str(request_count) + "\n", "utf-8"))
|
||||||
|
|
||||||
|
for metric in station_metrics:
|
||||||
|
#print(metric)
|
||||||
|
self.wfile.write(bytes(exporter_prefix + metric + "\n", "utf-8"))
|
||||||
|
|
||||||
|
mutex.release()
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
global request_count
|
||||||
|
request_count = request_count + 1
|
||||||
|
print("Request: " + self.path)
|
||||||
|
if (self.path.startswith("/metrics")):
|
||||||
|
self.get_metrics()
|
||||||
|
else:
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-type", "text/html")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(bytes("<html>", "utf-8"))
|
||||||
|
self.wfile.write(bytes("<head><title>gas station exporter</title></head>", "utf-8"))
|
||||||
|
self.wfile.write(bytes("<body>", "utf-8"))
|
||||||
|
self.wfile.write(bytes('<h1>gas station exporter based on data from <a href="https://www.clever-tanken.de/">https://www.clever-tanken.de/</a></h1>', "utf-8"))
|
||||||
|
self.wfile.write(bytes('<p><a href="/metrics">Metrics</a></p>', "utf-8"))
|
||||||
|
self.wfile.write(bytes('<p>obtain station id from <a href="https://www.clever-tanken.de/tankstelle_details/3569">https://www.clever-tanken.de/tankstelle_details/3569</a></p>', "utf-8"))
|
||||||
|
self.wfile.write(bytes("</body>", "utf-8"))
|
||||||
|
self.wfile.write(bytes("</html>", "utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def update_metrics():
|
||||||
|
while True:
|
||||||
|
print("Scrape")
|
||||||
|
global station_metrics
|
||||||
|
global mutex
|
||||||
|
mutex.acquire()
|
||||||
|
station_metrics.clear()
|
||||||
|
|
||||||
|
for station_id in stations_ids:
|
||||||
|
station_data = station_scraper.scrape_station(station_id)
|
||||||
|
#print(station_data)
|
||||||
|
for fuel in station_data['fuels']:
|
||||||
|
#print(fuel)
|
||||||
|
station_metrics.append(station_data['station_metric_basename'] + "_" + fuel['name'] + " " + str(fuel['price']))
|
||||||
|
mutex.release()
|
||||||
|
time.sleep(300)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("start")
|
||||||
|
|
||||||
|
webServer = HTTPServer((hostName, serverPort), RequestHandler)
|
||||||
|
|
||||||
|
print("Server started http://%s:%s" % (hostName, serverPort))
|
||||||
|
|
||||||
|
update_metrics_thread = threading.Thread(target=update_metrics, args=())
|
||||||
|
update_metrics_thread.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
webServer.serve_forever()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
|
||||||
|
webServer.server_close()
|
||||||
|
print("Server stopped.")
|
||||||
|
update_metrics_thread.join()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -0,0 +1,22 @@
|
||||||
|
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||||
|
Date of creation: 2022/05/31
|
||||||
|
Date of last modification: 2022/05/31
|
||||||
|
"""
|
||||||
|
import random
|
||||||
|
|
||||||
|
def get_random_user_agent():
|
||||||
|
uastrings = [
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",\
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",\
|
||||||
|
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",\
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",\
|
||||||
|
"Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",\
|
||||||
|
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",\
|
||||||
|
"Dalvik/2.1.0 (Linux; U; Android 10; M2006C3MI MIUI/V12.0.15.0.QCRINXM)"\
|
||||||
|
]
|
||||||
|
|
||||||
|
return random.choice(uastrings)
|
|
@ -0,0 +1,73 @@
|
||||||
|
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||||
|
Date of creation: 2022/10/25
|
||||||
|
Date of last modification: 2022/10/25
|
||||||
|
"""
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import datetime
|
||||||
|
import urllib3
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import unidecode
|
||||||
|
import helper
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
def make_soup(url):
|
||||||
|
user_agent = {'user-agent': helper.get_random_user_agent()}
|
||||||
|
#print(user_agent)
|
||||||
|
http = urllib3.PoolManager(10, headers=user_agent)
|
||||||
|
r = http.request("GET", url)
|
||||||
|
return BeautifulSoup(r.data,'lxml')
|
||||||
|
|
||||||
|
def scrape_station(station_id):
|
||||||
|
url = "https://www.clever-tanken.de/tankstelle_details/" + str(station_id)
|
||||||
|
#print ("Web Page: ", url)
|
||||||
|
|
||||||
|
soup = make_soup(url)
|
||||||
|
|
||||||
|
#print(soup.find("span", {"itemprop": 'name' }).text)
|
||||||
|
#print(soup.find("span", {"itemprop": 'streetAddress' }).text)
|
||||||
|
#print(soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text)
|
||||||
|
#print(soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text)
|
||||||
|
|
||||||
|
|
||||||
|
station_metric_basename = soup.find("span", {"itemprop": 'name' }).text + "_"\
|
||||||
|
+ soup.find("span", {"itemprop": 'streetAddress' }).text + "_"\
|
||||||
|
+ soup.find("span", {"itemprop": 'http://schema.org/postalCode' }).text + "_"\
|
||||||
|
+ soup.find("span", {"itemprop": 'http://schema.org/addressCountry' }).text
|
||||||
|
|
||||||
|
station_metric_basename = station_metric_basename.replace(" ", "" ).replace(".", "" ).replace(",", "" )
|
||||||
|
|
||||||
|
station_metric_basename = unidecode.unidecode(station_metric_basename)
|
||||||
|
|
||||||
|
#print(station_metric_basename)
|
||||||
|
|
||||||
|
results = soup.find_all("div", class_="price-row row d-flex align-items-center")
|
||||||
|
fuels = list()
|
||||||
|
for result in results:
|
||||||
|
try:
|
||||||
|
fuel_name = unidecode.unidecode(result.div.div.text.replace(" ", "" ).replace(".", "" ).replace(",", "" ).replace("-", ""))
|
||||||
|
fuel_price = round(float(result.find("span", {"id": re.compile('current-price-*') }).text)+0.009, 3)
|
||||||
|
|
||||||
|
#print(fuel_name, end=": ")
|
||||||
|
#print(fuel_price)
|
||||||
|
|
||||||
|
fuels.append({'name': fuel_name, 'price': fuel_price})
|
||||||
|
except (AttributeError, KeyError) as ex:
|
||||||
|
pass
|
||||||
|
|
||||||
|
station = {
|
||||||
|
'id': station_id,
|
||||||
|
'station_metric_basename': station_metric_basename,
|
||||||
|
'fuels': fuels
|
||||||
|
}
|
||||||
|
return station
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
station = scrape_station(21907)
|
||||||
|
print(station)
|
||||||
|
|
Loading…
Reference in New Issue