diff --git a/gas-station-exporter.py b/gas-station-exporter.py index b009b01..43015b2 100644 --- a/gas-station-exporter.py +++ b/gas-station-exporter.py @@ -20,6 +20,7 @@ exporter_prefix = "gas_" stations_ids = (20153, 21907, 183433, 159416, 30856, 16362, 12634) request_count = 0 +scrape_healthy = True startTime = datetime.now() station_metrics = list() mutex = threading.Lock() @@ -37,6 +38,7 @@ class RequestHandler(BaseHTTPRequestHandler): self.end_headers() self.wfile.write(bytes(exporter_prefix + "expoter_duration_seconds_sum " + str(int((datetime.now() - startTime).total_seconds())) + "\n", "utf-8")) self.wfile.write(bytes(exporter_prefix + "exporter_request_count " + str(request_count) + "\n", "utf-8")) + self.wfile.write(bytes(exporter_prefix + "exporter_scrape_healthy " + str(scrape_healthy) + "\n", "utf-8")) for metric in station_metrics: #print(metric) @@ -69,15 +71,22 @@ def update_metrics(): print("Scrape") global station_metrics global mutex + global scrape_healthy mutex.acquire() + scrape_healthy = True station_metrics.clear() for station_id in stations_ids: - station_data = station_scraper.scrape_station(station_id) - #print(station_data) - for fuel in station_data['fuels']: - #print(fuel) - station_metrics.append(station_data['station_metric_basename'] + "_" + fuel['name'] + " " + str(fuel['price'])) + try: + station_data = station_scraper.scrape_station(station_id) + #print(station_data) + for fuel in station_data['fuels']: + #print(fuel) + station_metrics.append(station_data['station_metric_basename'] + "_" + fuel['name'] + " " + str(fuel['price'])) + except Exception as ex: + print("scrape error: " + str(ex)) + scrape_healthy = False + pass mutex.release() time.sleep(300) diff --git a/station_scraper.py b/station_scraper.py index a2213d9..f597c1f 100644 --- a/station_scraper.py +++ b/station_scraper.py @@ -21,6 +21,8 @@ def make_soup(url): #print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) + if (r.status != 200): + raise FileNotFoundError("http error code " + str(r.status) + " for " + url) return BeautifulSoup(r.data,'lxml') def scrape_station(station_id):