parallel scrape and fix metrics
This commit is contained in:
@ -268,15 +268,29 @@ def scrape_listing(url):
|
||||
return None
|
||||
|
||||
|
||||
def prefetch_listings_thread(session_id):
|
||||
"""Background thread to prefetch all listings"""
|
||||
def scrape_listing_wrapper(session_id, url, results, index):
|
||||
"""Wrapper for scraping listing in thread"""
|
||||
session = scrape_sessions.get(session_id)
|
||||
if not session:
|
||||
return
|
||||
|
||||
print(f"Starting prefetch for session {session_id}")
|
||||
listing = scrape_listing(url)
|
||||
results[index] = listing
|
||||
|
||||
for i, url in enumerate(session["urls"]):
|
||||
|
||||
def prefetch_listings_thread(session_id):
|
||||
"""Background thread to prefetch all listings with parallel workers"""
|
||||
session = scrape_sessions.get(session_id)
|
||||
if not session:
|
||||
return
|
||||
urls = session["urls"]
|
||||
max_workers = random.randrange(2, 8)
|
||||
|
||||
print(
|
||||
f"Starting prefetch for session {session_id} with {max_workers} parallel workers"
|
||||
)
|
||||
|
||||
for i in range(0, len(urls), max_workers):
|
||||
# Check if session was cancelled or deleted
|
||||
if (
|
||||
session_id not in scrape_sessions
|
||||
@ -285,15 +299,35 @@ def prefetch_listings_thread(session_id):
|
||||
print(f"Prefetch stopped for session {session_id}")
|
||||
return
|
||||
|
||||
listing = scrape_listing(url)
|
||||
if listing:
|
||||
session["listings"].append(listing)
|
||||
session["scraped"] += 1
|
||||
time.sleep(0.3) # Rate limiting
|
||||
# Process batch of URLs in parallel
|
||||
batch = urls[i : i + max_workers]
|
||||
threads = []
|
||||
results = [None] * len(batch)
|
||||
|
||||
print(
|
||||
f"Prefetch complete for session {session_id}: {len(session['listings'])} listings"
|
||||
)
|
||||
for j, url in enumerate(batch):
|
||||
thread = threading.Thread(
|
||||
target=scrape_listing_wrapper,
|
||||
args=(session_id, url, results, j),
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
|
||||
# Wait for all threads in this batch to complete
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
# Add results to session
|
||||
for listing in results:
|
||||
if listing:
|
||||
session["listings"].append(listing)
|
||||
|
||||
session["scraped"] += len(batch)
|
||||
|
||||
# Rate limiting between batches
|
||||
time.sleep(0.5)
|
||||
|
||||
print(f"Prefetch complete for session {session_id}")
|
||||
|
||||
|
||||
@app.route("/api/search", methods=["POST"])
|
||||
@ -416,16 +450,61 @@ def api_metrics():
|
||||
|
||||
uptime = time.time() - app_start_time
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"search_requests_total": metrics["search_requests"],
|
||||
"scrape_requests_total": metrics["scrape_requests"],
|
||||
"uptime_seconds": uptime,
|
||||
"kleinanzeigen_response_codes": metrics["kleinanzeigen_response_codes"],
|
||||
"nominatim_response_codes": metrics["nominatim_response_codes"],
|
||||
"active_sessions": len(scrape_sessions),
|
||||
"cache_size": len(zip_cache),
|
||||
}
|
||||
# Build Prometheus text format
|
||||
lines = []
|
||||
|
||||
# Search requests
|
||||
lines.append("# HELP search_requests_total Total number of search requests")
|
||||
lines.append("# TYPE search_requests_total counter")
|
||||
lines.append(f"search_requests_total {metrics['search_requests']}")
|
||||
lines.append("")
|
||||
|
||||
# Scrape requests
|
||||
lines.append("# HELP scrape_requests_total Total number of scrape requests")
|
||||
lines.append("# TYPE scrape_requests_total counter")
|
||||
lines.append(f"scrape_requests_total {metrics['scrape_requests']}")
|
||||
lines.append("")
|
||||
|
||||
# Uptime
|
||||
lines.append("# HELP uptime_seconds Application uptime in seconds")
|
||||
lines.append("# TYPE uptime_seconds gauge")
|
||||
lines.append(f"uptime_seconds {uptime}")
|
||||
lines.append("")
|
||||
|
||||
# Active sessions
|
||||
lines.append("# HELP active_sessions Number of active scraping sessions")
|
||||
lines.append("# TYPE active_sessions gauge")
|
||||
lines.append(f"active_sessions {len(scrape_sessions)}")
|
||||
lines.append("")
|
||||
|
||||
# Cache size
|
||||
lines.append("# HELP cache_size Number of cached ZIP codes")
|
||||
lines.append("# TYPE cache_size gauge")
|
||||
lines.append(f"zip_code_cache_size {len(zip_cache)}")
|
||||
lines.append("")
|
||||
|
||||
# Kleinanzeigen response codes
|
||||
lines.append(
|
||||
"# HELP kleinanzeigen_http_responses_total HTTP responses from kleinanzeigen.de"
|
||||
)
|
||||
lines.append("# TYPE kleinanzeigen_http_responses_total counter")
|
||||
for code, count in metrics["kleinanzeigen_response_codes"].items():
|
||||
lines.append(f'kleinanzeigen_http_responses_total{{code="{code}"}} {count}')
|
||||
lines.append("")
|
||||
|
||||
# Nominatim response codes
|
||||
lines.append(
|
||||
"# HELP nominatim_http_responses_total HTTP responses from Nominatim API"
|
||||
)
|
||||
lines.append("# TYPE nominatim_http_responses_total counter")
|
||||
for code, count in metrics["nominatim_response_codes"].items():
|
||||
lines.append(f'nominatim_http_responses_total{{code="{code}"}} {count}')
|
||||
lines.append("")
|
||||
|
||||
return (
|
||||
"\n".join(lines),
|
||||
200,
|
||||
{"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -1598,5 +1598,433 @@
|
||||
"53175": {
|
||||
"lat": 50.6989638,
|
||||
"lon": 7.1445107
|
||||
},
|
||||
"78467": {
|
||||
"lat": 47.6929555,
|
||||
"lon": 9.1513759
|
||||
},
|
||||
"48703": {
|
||||
"lat": 52.0035321,
|
||||
"lon": 6.9517971
|
||||
},
|
||||
"46049": {
|
||||
"lat": 51.4725211,
|
||||
"lon": 6.8311577
|
||||
},
|
||||
"48143": {
|
||||
"lat": 51.9604439,
|
||||
"lon": 7.6262442
|
||||
},
|
||||
"06231": {
|
||||
"lat": 51.2849151,
|
||||
"lon": 12.1146298
|
||||
},
|
||||
"33332": {
|
||||
"lat": 51.8972222,
|
||||
"lon": 8.4006525
|
||||
},
|
||||
"27283": {
|
||||
"lat": 52.9410676,
|
||||
"lon": 9.2354716
|
||||
},
|
||||
"10317": {
|
||||
"lat": 52.4986204,
|
||||
"lon": 13.4838382
|
||||
},
|
||||
"01640": {
|
||||
"lat": 51.1331059,
|
||||
"lon": 13.5656911
|
||||
},
|
||||
"46244": {
|
||||
"lat": 51.5984773,
|
||||
"lon": 6.9123203
|
||||
},
|
||||
"01796": {
|
||||
"lat": 50.9470409,
|
||||
"lon": 13.9505572
|
||||
},
|
||||
"32339": {
|
||||
"lat": 52.3741653,
|
||||
"lon": 8.6212978
|
||||
},
|
||||
"50181": {
|
||||
"lat": 51.0144705,
|
||||
"lon": 6.5569525
|
||||
},
|
||||
"93055": {
|
||||
"lat": 49.007933,
|
||||
"lon": 12.1608121
|
||||
},
|
||||
"18147": {
|
||||
"lat": 54.1309902,
|
||||
"lon": 12.1196962
|
||||
},
|
||||
"49504": {
|
||||
"lat": 52.2991515,
|
||||
"lon": 7.9218375
|
||||
},
|
||||
"60318": {
|
||||
"lat": 50.1246887,
|
||||
"lon": 8.6865254
|
||||
},
|
||||
"96052": {
|
||||
"lat": 49.9117586,
|
||||
"lon": 10.8880355
|
||||
},
|
||||
"29559": {
|
||||
"lat": 52.8774192,
|
||||
"lon": 10.6061272
|
||||
},
|
||||
"10115": {
|
||||
"lat": 52.5319487,
|
||||
"lon": 13.3837943
|
||||
},
|
||||
"27251": {
|
||||
"lat": 52.7492339,
|
||||
"lon": 8.7757762
|
||||
},
|
||||
"22303": {
|
||||
"lat": 53.5897407,
|
||||
"lon": 10.0234361
|
||||
},
|
||||
"38122": {
|
||||
"lat": 52.2297328,
|
||||
"lon": 10.4745918
|
||||
},
|
||||
"51371": {
|
||||
"lat": 51.0590744,
|
||||
"lon": 6.9417484
|
||||
},
|
||||
"60314": {
|
||||
"lat": 50.1166698,
|
||||
"lon": 8.7334387
|
||||
},
|
||||
"70376": {
|
||||
"lat": 48.818393,
|
||||
"lon": 9.2066864
|
||||
},
|
||||
"93499": {
|
||||
"lat": 49.1429872,
|
||||
"lon": 12.7164157
|
||||
},
|
||||
"18435": {
|
||||
"lat": 54.32997,
|
||||
"lon": 13.0649961
|
||||
},
|
||||
"12105": {
|
||||
"lat": 52.4484553,
|
||||
"lon": 13.3722304
|
||||
},
|
||||
"81929": {
|
||||
"lat": 48.1606494,
|
||||
"lon": 11.6631075
|
||||
},
|
||||
"45768": {
|
||||
"lat": 51.6575564,
|
||||
"lon": 7.0659333
|
||||
},
|
||||
"91074": {
|
||||
"lat": 49.5738171,
|
||||
"lon": 10.8926968
|
||||
},
|
||||
"49593": {
|
||||
"lat": 52.5674997,
|
||||
"lon": 7.9325832
|
||||
},
|
||||
"80935": {
|
||||
"lat": 48.1997053,
|
||||
"lon": 11.5552742
|
||||
},
|
||||
"52134": {
|
||||
"lat": 50.8605761,
|
||||
"lon": 6.1001816
|
||||
},
|
||||
"94535": {
|
||||
"lat": 48.7110796,
|
||||
"lon": 13.2553681
|
||||
},
|
||||
"99947": {
|
||||
"lat": 51.1242532,
|
||||
"lon": 10.6769762
|
||||
},
|
||||
"09112": {
|
||||
"lat": 50.830933,
|
||||
"lon": 12.9053458
|
||||
},
|
||||
"01968": {
|
||||
"lat": 51.5238377,
|
||||
"lon": 14.0284911
|
||||
},
|
||||
"31515": {
|
||||
"lat": 52.4314053,
|
||||
"lon": 9.428236
|
||||
},
|
||||
"40547": {
|
||||
"lat": 51.2441486,
|
||||
"lon": 6.7400785
|
||||
},
|
||||
"72800": {
|
||||
"lat": 48.4830973,
|
||||
"lon": 9.2728039
|
||||
},
|
||||
"81476": {
|
||||
"lat": 48.0873869,
|
||||
"lon": 11.4957046
|
||||
},
|
||||
"94034": {
|
||||
"lat": 48.593963,
|
||||
"lon": 13.449846
|
||||
},
|
||||
"84478": {
|
||||
"lat": 48.1977065,
|
||||
"lon": 12.4064772
|
||||
},
|
||||
"69120": {
|
||||
"lat": 49.4197028,
|
||||
"lon": 8.7013385
|
||||
},
|
||||
"16303": {
|
||||
"lat": 53.0795487,
|
||||
"lon": 14.2322027
|
||||
},
|
||||
"48165": {
|
||||
"lat": 51.8982648,
|
||||
"lon": 7.650382
|
||||
},
|
||||
"23554": {
|
||||
"lat": 53.889632,
|
||||
"lon": 10.6772133
|
||||
},
|
||||
"57648": {
|
||||
"lat": 50.6553129,
|
||||
"lon": 7.9089968
|
||||
},
|
||||
"50677": {
|
||||
"lat": 50.9222793,
|
||||
"lon": 6.9491251
|
||||
},
|
||||
"26826": {
|
||||
"lat": 53.165921,
|
||||
"lon": 7.3277997
|
||||
},
|
||||
"24340": {
|
||||
"lat": 54.4684418,
|
||||
"lon": 9.7984274
|
||||
},
|
||||
"25335": {
|
||||
"lat": 53.7556754,
|
||||
"lon": 9.6072404
|
||||
},
|
||||
"89160": {
|
||||
"lat": 48.4795088,
|
||||
"lon": 9.9097371
|
||||
},
|
||||
"51580": {
|
||||
"lat": 50.955823,
|
||||
"lon": 7.6952729
|
||||
},
|
||||
"59075": {
|
||||
"lat": 51.706537,
|
||||
"lon": 7.7471066
|
||||
},
|
||||
"28355": {
|
||||
"lat": 53.1001424,
|
||||
"lon": 8.9369005
|
||||
},
|
||||
"10961": {
|
||||
"lat": 52.492375,
|
||||
"lon": 13.3969612
|
||||
},
|
||||
"33649": {
|
||||
"lat": 51.9812735,
|
||||
"lon": 8.4631941
|
||||
},
|
||||
"01945": {
|
||||
"lat": 51.426725,
|
||||
"lon": 13.8800707
|
||||
},
|
||||
"40225": {
|
||||
"lat": 51.1952407,
|
||||
"lon": 6.7930966
|
||||
},
|
||||
"83043": {
|
||||
"lat": 47.8653219,
|
||||
"lon": 12.0086382
|
||||
},
|
||||
"01279": {
|
||||
"lat": 51.0279271,
|
||||
"lon": 13.8224355
|
||||
},
|
||||
"88348": {
|
||||
"lat": 48.0132718,
|
||||
"lon": 9.5038216
|
||||
},
|
||||
"57078": {
|
||||
"lat": 50.9241982,
|
||||
"lon": 7.9979802
|
||||
},
|
||||
"72160": {
|
||||
"lat": 48.4524183,
|
||||
"lon": 8.6624266
|
||||
},
|
||||
"49716": {
|
||||
"lat": 52.6985078,
|
||||
"lon": 7.2503852
|
||||
},
|
||||
"24111": {
|
||||
"lat": 54.3043198,
|
||||
"lon": 10.0647871
|
||||
},
|
||||
"09116": {
|
||||
"lat": 50.8205765,
|
||||
"lon": 12.8734753
|
||||
},
|
||||
"63450": {
|
||||
"lat": 50.1285671,
|
||||
"lon": 8.9252343
|
||||
},
|
||||
"64285": {
|
||||
"lat": 49.8517954,
|
||||
"lon": 8.6583914
|
||||
},
|
||||
"46399": {
|
||||
"lat": 51.8767165,
|
||||
"lon": 6.592176
|
||||
},
|
||||
"50823": {
|
||||
"lat": 50.9508203,
|
||||
"lon": 6.9259111
|
||||
},
|
||||
"51702": {
|
||||
"lat": 51.0304049,
|
||||
"lon": 7.6756018
|
||||
},
|
||||
"26129": {
|
||||
"lat": 53.1529595,
|
||||
"lon": 8.1751768
|
||||
},
|
||||
"22391": {
|
||||
"lat": 53.6423048,
|
||||
"lon": 10.081893
|
||||
},
|
||||
"41472": {
|
||||
"lat": 51.1601804,
|
||||
"lon": 6.654715
|
||||
},
|
||||
"76199": {
|
||||
"lat": 48.9755465,
|
||||
"lon": 8.4040415
|
||||
},
|
||||
"35043": {
|
||||
"lat": 50.7979432,
|
||||
"lon": 8.8227218
|
||||
},
|
||||
"65929": {
|
||||
"lat": 50.0944874,
|
||||
"lon": 8.5308675
|
||||
},
|
||||
"27308": {
|
||||
"lat": 52.9255206,
|
||||
"lon": 9.3782295
|
||||
},
|
||||
"99510": {
|
||||
"lat": 51.0351265,
|
||||
"lon": 11.4866204
|
||||
},
|
||||
"94315": {
|
||||
"lat": 48.8839157,
|
||||
"lon": 12.5955773
|
||||
},
|
||||
"69126": {
|
||||
"lat": 49.3773204,
|
||||
"lon": 8.7015986
|
||||
},
|
||||
"14193": {
|
||||
"lat": 52.4813456,
|
||||
"lon": 13.2384701
|
||||
},
|
||||
"04318": {
|
||||
"lat": 51.3431283,
|
||||
"lon": 12.4282967
|
||||
},
|
||||
"48161": {
|
||||
"lat": 51.9892494,
|
||||
"lon": 7.5383949
|
||||
},
|
||||
"35683": {
|
||||
"lat": 50.7423221,
|
||||
"lon": 8.2847449
|
||||
},
|
||||
"42477": {
|
||||
"lat": 51.2100192,
|
||||
"lon": 7.3649391
|
||||
},
|
||||
"48317": {
|
||||
"lat": 51.8011531,
|
||||
"lon": 7.7434268
|
||||
},
|
||||
"10999": {
|
||||
"lat": 52.4976589,
|
||||
"lon": 13.4231017
|
||||
},
|
||||
"88260": {
|
||||
"lat": 47.7032598,
|
||||
"lon": 9.9431795
|
||||
},
|
||||
"72760": {
|
||||
"lat": 48.5120972,
|
||||
"lon": 9.2052416
|
||||
},
|
||||
"82467": {
|
||||
"lat": 47.4902875,
|
||||
"lon": 11.0332252
|
||||
},
|
||||
"44319": {
|
||||
"lat": 51.5383021,
|
||||
"lon": 7.6017367
|
||||
},
|
||||
"12524": {
|
||||
"lat": 52.4118113,
|
||||
"lon": 13.5481684
|
||||
},
|
||||
"99428": {
|
||||
"lat": 50.9720626,
|
||||
"lon": 11.2029566
|
||||
},
|
||||
"86695": {
|
||||
"lat": 48.6048016,
|
||||
"lon": 10.8196538
|
||||
},
|
||||
"04177": {
|
||||
"lat": 51.3425514,
|
||||
"lon": 12.330756
|
||||
},
|
||||
"50735": {
|
||||
"lat": 50.9893938,
|
||||
"lon": 6.9609471
|
||||
},
|
||||
"53909": {
|
||||
"lat": 50.692835,
|
||||
"lon": 6.6581295
|
||||
},
|
||||
"50169": {
|
||||
"lat": 50.8807881,
|
||||
"lon": 6.7426581
|
||||
},
|
||||
"89584": {
|
||||
"lat": 48.2846841,
|
||||
"lon": 9.6586816
|
||||
},
|
||||
"47179": {
|
||||
"lat": 51.5247327,
|
||||
"lon": 6.7297793
|
||||
},
|
||||
"76287": {
|
||||
"lat": 48.9632556,
|
||||
"lon": 8.310033
|
||||
},
|
||||
"74072": {
|
||||
"lat": 49.1394593,
|
||||
"lon": 9.2148992
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user