diff --git a/README.md b/README.md index 95af25c..3f78689 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,11 @@ # kleinanzeigen-boosted -scrape kleinanzeigen.de for listings and allow filtering +***WIP*** ## Requirements ``` -pip install tinydb tqdm beautifulsoup4 +pip install flask flask-cors beautifulsoup4 lxml urllib3 requests ``` -## Usage - -### Scrape listings -Scrape all listings from seller's shop with `python search_kleinanzeigen.py` diff --git a/backend/scrape_proxy.py b/backend/scrape_proxy.py new file mode 100644 index 0000000..a538316 --- /dev/null +++ b/backend/scrape_proxy.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Flask API Server for Kleinanzeigen Scraper +Author: Hendrik Schutter +Date: 2025/11/24 +""" + +from flask import Flask, request, jsonify +from flask_cors import CORS +from bs4 import BeautifulSoup +from datetime import datetime +import urllib3 +import random +import requests +import time +import json +import os +import uuid + +app = Flask(__name__) +CORS(app) + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +# ZIP code cache file +CACHE_FILE = "zip_cache.json" +zip_cache = {} + +# Active scrape sessions +scrape_sessions = {} + +SESSION_TIMEOUT = 300 # seconds + +def cleanup_old_sessions(): + current_time = time.time() + sessions_to_remove = [] + + for session_id, session in scrape_sessions.items(): + if current_time - session.get("created_at", current_time) > SESSION_TIMEOUT: + sessions_to_remove.append(session_id) + + for session_id in sessions_to_remove: + del scrape_sessions[session_id] + print(f"Cleaned up old session: {session_id}") + + return len(sessions_to_remove) + + +def get_random_user_agent(): + """Generate random user agent string""" + uastrings = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + ] + return random.choice(uastrings) + + +def make_soup(url): + """Fetch URL and return BeautifulSoup object""" + user_agent = {"user-agent": get_random_user_agent()} + http = urllib3.PoolManager(10, headers=user_agent) + r = http.request("GET", url) + return BeautifulSoup(r.data, "lxml") + + +def geocode_zip(zip_code): + """Geocode ZIP code using Nominatim API with caching""" + zip_code = str(zip_code) + + # Check cache first + if zip_code in zip_cache: + return zip_cache[zip_code] + + # Call Nominatim API + url = "https://nominatim.openstreetmap.org/search" + params = { + "postalcode": zip_code, + "country": "Germany", + "format": "json", + "limit": 1, + } + + try: + response = requests.get( + url, params=params, headers={"user-agent": get_random_user_agent()} + ) + data = response.json() + + if data: + coords = {"lat": float(data[0]["lat"]), "lon": float(data[0]["lon"])} + zip_cache[zip_code] = coords + + # Save cache + with open(CACHE_FILE, "w", encoding="utf-8") as f: + json.dump(zip_cache, f, ensure_ascii=False, indent=2) + + time.sleep(1) # Respect API rate limits + return coords + except Exception as e: + print(f"Geocoding error for {zip_code}: {e}") + + return None + + +def search_listings(search_term, max_pages, min_price, max_price): + """Search for listings on kleinanzeigen.de - returns only URLs""" + base_url = "https://www.kleinanzeigen.de" + found_listings = set() + + for page_counter in range(1, max_pages + 1): + listing_url = ( + base_url + + "/s-anbieter:privat/anzeige:angebote/preis:" + + str(min_price) + + ":" + + str(max_price) + + "/seite:" + + str(page_counter) + + "/" + + search_term.replace(" ", "-") + + "/k0" + ) + + print(f"Scraping page {page_counter}: {listing_url}") + + try: + soup = make_soup(listing_url) + results = soup.find_all("li", class_="ad-listitem fully-clickable-card") + + if len(results) <= 0: + break + + for result in results: + try: + listing_url = result.a["href"] + found_listings.add(base_url + listing_url) + except (AttributeError, KeyError): + pass + except Exception as e: + print(f"Error scraping page {page_counter}: {e}") + break + + return list(found_listings) + + +def scrape_listing(url): + """Scrape individual listing details""" + try: + soup = make_soup(url) + + title = soup.find("h1", class_="boxedarticle--title") + if not title: + return None + title = title.text.strip() + + price_elem = soup.find("h2", class_="boxedarticle--price") + price = 0 + if price_elem: + price_text = price_elem.text.strip().split(" ")[0] + try: + price = int(price_text.replace(".", "").replace(",", "")) + except: + price = 0 + + flexlist = soup.find("ul", class_="flexlist text-light-800") + listing_id = 0 + if flexlist: + flex_items = flexlist.find_all("li", recursive=False) + if len(flex_items) > 1: + try: + listing_id = int(flex_items[1].text.strip()) + except: + pass + + locality = soup.find("span", id="viewad-locality") + zip_code = None + address = "" + if locality: + locality_text = locality.text.strip() + parts = locality_text.split(" ", 1) + if parts: + zip_code = parts[0] + if len(parts) > 1: + address = parts[1] + + date_added = None + details_divs = soup.find_all("div", class_="boxedarticle--details--full") + if len(details_divs) > 1: + date_span = details_divs[1].find("span") + if date_span: + try: + date_added = datetime.strptime(date_span.text, "%d.%m.%Y") + except: + pass + + first_image = None + img_elem = soup.find("div", class_="galleryimage-element current") + if img_elem: + img = img_elem.find("img") + if img and img.get("src"): + first_image = img["src"] + + if not zip_code: + return None + + listing = { + "title": title, + "price": price, + "id": listing_id, + "zip_code": zip_code, + "address": address, + "date_added": date_added.isoformat() if date_added else None, + "image": first_image, + "url": url, + } + + # Add coordinates + coords = geocode_zip(zip_code) + if coords and isinstance(coords, dict): + listing["lat"] = coords.get("lat") + listing["lon"] = coords.get("lon") + + return listing + + except Exception as e: + print(f"Error scraping listing {url}: {e}") + return None + + +@app.route("/api/search", methods=["POST"]) +def api_search(): + """API endpoint for searching listings - returns only count and URLs""" + data = request.json + + # Cleanup old sessions before creating new one + cleanup_old_sessions() + + search_term = data.get("search_term", "") + max_pages = data.get("max_pages", 1) + min_price = data.get("min_price", 0) + max_price = data.get("max_price", 10000) + + if not search_term: + return jsonify({"error": "Search term is required"}), 400 + + try: + # Search for listing URLs only + listing_urls = search_listings(search_term, max_pages, min_price, max_price) + + # Create session ID + session_id = str(uuid.uuid4()) + + # Store session with creation timestamp + scrape_sessions[session_id] = { + "urls": listing_urls, + "total": len(listing_urls), + "scraped": 0, + "listings": [], + "cancelled": False, + "created_at": time.time(), + } + + return jsonify({"session_id": session_id, "total": len(listing_urls)}) + + except Exception as e: + return jsonify({"error": str(e)}), 500 + + +@app.route("/api/scrape/", methods=["GET"]) +def api_scrape(session_id): + """API endpoint for scraping next listing in session""" + # Cleanup old sessions on each request + cleanup_old_sessions() + + if session_id not in scrape_sessions: + return jsonify({"error": "Invalid session ID"}), 404 + + session = scrape_sessions[session_id] + + if session["cancelled"]: + return jsonify({"cancelled": True}), 200 + + if session["scraped"] >= session["total"]: + return jsonify({"complete": True, "listing": None}) + + # Scrape next listing + url = session["urls"][session["scraped"]] + listing = scrape_listing(url) + + if listing: + session["listings"].append(listing) + + session["scraped"] += 1 + + return jsonify( + { + "complete": session["scraped"] >= session["total"], + "listing": listing, + "progress": {"current": session["scraped"], "total": session["total"]}, + } + ) + + +@app.route("/api/scrape//cancel", methods=["POST"]) +def api_cancel_scrape(session_id): + """API endpoint to cancel scraping session""" + cleanup_old_sessions() + + if session_id not in scrape_sessions: + return jsonify({"error": "Invalid session ID"}), 404 + + scrape_sessions[session_id]["cancelled"] = True + + return jsonify( + { + "cancelled": True, + "listings": scrape_sessions[session_id]["listings"], + "total_scraped": len(scrape_sessions[session_id]["listings"]), + } + ) + + +@app.route("/api/scrape//results", methods=["GET"]) +def api_get_results(session_id): + """API endpoint to get all scraped results""" + cleanup_old_sessions() + + if session_id not in scrape_sessions: + return jsonify({"error": "Invalid session ID"}), 404 + + session = scrape_sessions[session_id] + + return jsonify( + { + "listings": session["listings"], + "total": len(session["listings"]), + "progress": {"current": session["scraped"], "total": session["total"]}, + } + ) + + +@app.route("/api/health", methods=["GET"]) +def health(): + """Health check endpoint""" + cleanup_old_sessions() + return jsonify( + { + "status": "ok", + "cache_size": len(zip_cache), + "active_sessions": len(scrape_sessions), + } + ) + + +if __name__ == "__main__": + print("Starting Kleinanzeigen Scraper API Server...") + + # Load cache on startup + if os.path.exists(CACHE_FILE): + with open(CACHE_FILE, "r", encoding="utf-8") as f: + zip_cache = json.load(f) + + print(f"Loaded {len(zip_cache)} ZIP codes from cache") + print("ZIP code cache loaded with", len(zip_cache), "entries") + app.run(debug=True, host="0.0.0.0", port=5000) diff --git a/backend/zip_cache.json b/backend/zip_cache.json new file mode 100644 index 0000000..8329b26 --- /dev/null +++ b/backend/zip_cache.json @@ -0,0 +1,1374 @@ +{ + "65396": [ + 50.0493089, + 8.1528234 + ], + "31224": [ + 52.3217589, + 10.2740354 + ], + "87700": [ + 47.981622, + 10.168735 + ], + "18119": [ + 54.171753, + 12.0715803 + ], + "17235": [ + 53.3364758, + 13.0891908 + ], + "33100": [ + 51.7188767, + 8.8204875 + ], + "41564": [ + 51.219695, + 6.6018513 + ], + "85049": [ + 48.7637998, + 11.3534347 + ], + "50858": [ + 50.9241854, + 6.8594359 + ], + "53619": [ + 50.6248614, + 7.236131 + ], + "35104": [ + 51.169858, + 8.7991212 + ], + "12681": [ + 52.53836, + 13.5345701 + ], + "24796": [ + 54.3295051, + 9.843516 + ], + "38226": [ + 52.1582602, + 10.3280734 + ], + "12051": [ + 52.4666256, + 13.4291505 + ], + "63762": [ + 49.91076, + 9.0649764 + ], + "53113": [ + 50.720036, + 7.1222135 + ], + "81825": [ + 48.1180693, + 11.6617926 + ], + "60437": [ + 50.199045, + 8.6785157 + ], + "22175": [ + 53.624188, + 10.0964195 + ], + "24939": [ + 54.8039798, + 9.4199303 + ], + "87645": [ + 47.5796844, + 10.7589108 + ], + "46446": [ + 51.8554151, + 6.2334481 + ], + "55543": [ + 49.8270169, + 7.8649707 + ], + "45145": [ + 51.4466252, + 6.9759833 + ], + "80636": [ + 48.1503498, + 11.5421682 + ], + "46562": [ + 51.5996368, + 6.6553946 + ], + "21683": [ + 53.6379564, + 9.4633336 + ], + "45147": [ + 51.439, + 6.978438 + ], + "42699": [ + 51.1472454, + 7.0160744 + ], + "66606": [ + 49.4638241, + 7.1921958 + ], + "63075": [ + 50.1172347, + 8.7952867 + ], + "89233": [ + 48.38224, + 10.063212 + ], + "64750": [ + 49.7698447, + 9.0733458 + ], + "60488": [ + 50.1409814, + 8.61332 + ], + "41372": [ + 51.2028546, + 6.1488664 + ], + "79106": [ + 48.0066367, + 7.84115 + ], + "25421": [ + 53.6461272, + 9.7978295 + ], + "55120": [ + 50.0230033, + 8.226238 + ], + "93354": [ + 48.7563843, + 11.8315879 + ], + "22417": [ + 53.6667862, + 10.0394606 + ], + "45276": [ + 51.4483666, + 7.0766091 + ], + "21614": [ + 53.4611745, + 9.6871791 + ], + "78183": [ + 47.894954, + 8.5286859 + ], + "76133": [ + 49.0145529, + 8.3869012 + ], + "30159": [ + 52.3754607, + 9.73718 + ], + "14974": [ + 52.2920028, + 13.253629 + ], + "22848": [ + 53.6702396, + 9.9647065 + ], + "65549": [ + 50.3875825, + 8.0632306 + ], + "24568": [ + 53.8409965, + 9.9604144 + ], + "51399": [ + 51.0912479, + 7.119063 + ], + "13507": [ + 52.5765704, + 13.274292 + ], + "74336": [ + 49.0817102, + 9.0820642 + ], + "28197": [ + 53.0977106, + 8.7113664 + ], + "38114": [ + 52.2821182, + 10.5057529 + ], + "73230": [ + 48.6405169, + 9.4514092 + ], + "65185": [ + 50.0754415, + 8.2409901 + ], + "63477": [ + 50.1549692, + 8.8291468 + ], + "79102": [ + 47.9867122, + 7.8606821 + ], + "44789": [ + 51.4681782, + 7.2196341 + ], + "66706": [ + 49.5020118, + 6.4292476 + ], + "39307": [ + 52.3825952, + 12.1672886 + ], + "16515": [ + 52.7711793, + 13.2878028 + ], + "53844": [ + 50.7974348, + 7.1155495 + ], + "66440": [ + 49.21398, + 7.2393898 + ], + "40764": [ + 51.107968, + 6.9485475 + ], + "72393": [ + 48.3144446, + 9.1316398 + ], + "31582": [ + 52.6392045, + 9.2177882 + ], + "64291": [ + 49.924334, + 8.6700403 + ], + "66994": [ + 49.148501, + 7.7599286 + ], + "10439": [ + 52.551628, + 13.4080206 + ], + "90482": [ + 49.4612837, + 11.1569783 + ], + "80336": [ + 48.1309411, + 11.5527628 + ], + "46397": [ + 51.8704976, + 6.6515339 + ], + "73434": [ + 48.861834, + 10.0087368 + ], + "55128": [ + 49.9803406, + 8.2359449 + ], + "56075": [ + 50.3185867, + 7.5671197 + ], + "42799": [ + 51.1097468, + 7.0699535 + ], + "60323": [ + 50.1238046, + 8.6617905 + ], + "49082": [ + 52.2467152, + 8.0611039 + ], + "71397": [ + 48.9018295, + 9.3913064 + ], + "59269": [ + 51.7595446, + 8.0495504 + ], + "58642": [ + 51.3697717, + 7.6112597 + ], + "69121": [ + 49.4355994, + 8.6909827 + ], + "99084": [ + 50.977007, + 11.0270416 + ], + "79100": [ + 47.948102, + 7.8806006 + ], + "30163": [ + 52.3985964, + 9.7468275 + ], + "65195": [ + 50.10668, + 8.1929797 + ], + "99427": [ + 51.007008, + 11.318925 + ], + "31632": [ + 52.5721633, + 9.2488844 + ], + "80805": [ + 48.1719894, + 11.6031233 + ], + "74575": [ + 49.3515287, + 9.9792114 + ], + "22415": [ + 53.6468629, + 10.0071084 + ], + "53879": [ + 50.6575091, + 6.7885086 + ], + "83435": [ + 47.7266717, + 12.8540813 + ], + "35080": [ + 50.7689326, + 8.47644 + ], + "88250": [ + 47.8048499, + 9.6401004 + ], + "53757": [ + 50.7699533, + 7.1900983 + ], + "91338": [ + 49.6178641, + 11.2272346 + ], + "48163": [ + 51.8913816, + 7.5748843 + ], + "50676": [ + 50.9302048, + 6.9530214 + ], + "20359": [ + 53.5513888, + 9.9652207 + ], + "48683": [ + 52.1012241, + 6.9597729 + ], + "58566": [ + 51.1451584, + 7.5867375 + ], + "47509": [ + 51.4512701, + 6.4681553 + ], + "52224": [ + 50.7367763, + 6.2868736 + ], + "30916": [ + 52.4597727, + 9.8484595 + ], + "13355": [ + 52.5410841, + 13.389616 + ], + "90408": [ + 49.4657815, + 11.0760787 + ], + "30161": [ + 52.3833242, + 9.7451797 + ], + "29574": [ + 53.027968, + 10.4238542 + ], + "68775": [ + 49.3661548, + 8.5256571 + ], + "26384": [ + 53.5380722, + 8.1441081 + ], + "45309": [ + 51.4795218, + 7.0725348 + ], + "70806": [ + 48.8659279, + 9.1851251 + ], + "81541": [ + 48.1212633, + 11.5877883 + ], + "88471": [ + 48.2284106, + 9.8458981 + ], + "73666": [ + 48.7497871, + 9.4398076 + ], + "53881": [ + 50.6318282, + 6.8138181 + ], + "31134": [ + 52.1426466, + 9.9525357 + ], + "18107": [ + 54.1509015, + 11.9998073 + ], + "96215": [ + 50.1315263, + 11.0839299 + ], + "25779": [ + 54.2778477, + 9.1499664 + ], + "91301": [ + 49.7122951, + 11.0664285 + ], + "99444": [ + 50.8463328, + 11.3542801 + ], + "57610": [ + 50.6880529, + 7.6639834 + ], + "90522": [ + 49.4236043, + 10.9708709 + ], + "16348": [ + 52.7603588, + 13.5034557 + ], + "71686": [ + 48.8824228, + 9.2656553 + ], + "94032": [ + 48.5673568, + 13.4621956 + ], + "67434": [ + 49.3395315, + 8.0767774 + ], + "13409": [ + 52.5674283, + 13.373084 + ], + "85609": [ + 48.192658, + 11.7142122 + ], + "23552": [ + 53.8672062, + 10.6879337 + ], + "55116": [ + 50.0010941, + 8.2698319 + ], + "46395": [ + 51.8194111, + 6.5897644 + ], + "91257": [ + 49.7388149, + 11.5281328 + ], + "86498": [ + 48.1958025, + 10.2678327 + ], + "83071": [ + 47.8658574, + 12.1883594 + ], + "80798": [ + 48.1551839, + 11.5661602 + ], + "65428": [ + 49.9819502, + 8.446357 + ], + "65205": [ + 50.0527128, + 8.3119383 + ], + "90455": [ + 49.368, + 11.0832301 + ], + "40476": { + "lat": 51.2497334, + "lon": 6.7826416 + }, + "28203": { + "lat": 53.0745847, + "lon": 8.8255484 + }, + "30629": { + "lat": 52.3945187, + "lon": 9.856456 + }, + "26939": { + "lat": 53.3125659, + "lon": 8.3709506 + }, + "24105": { + "lat": 54.3378072, + "lon": 10.1450606 + }, + "47059": { + "lat": 51.439653, + "lon": 6.7386164 + }, + "12529": { + "lat": 52.3658691, + "lon": 13.4986104 + }, + "94469": { + "lat": 48.8477283, + "lon": 12.9726396 + }, + "49080": { + "lat": 52.2574684, + "lon": 8.032973 + }, + "12459": { + "lat": 52.468305, + "lon": 13.5147534 + }, + "85716": { + "lat": 48.2771574, + "lon": 11.5535728 + }, + "52459": { + "lat": 50.8649164, + "lon": 6.3735307 + }, + "54298": { + "lat": 49.860982, + "lon": 6.5698888 + }, + "10249": { + "lat": 52.5232023, + "lon": 13.4415068 + }, + "74321": { + "lat": 48.9536697, + "lon": 9.1277841 + }, + "06246": { + "lat": 51.3790758, + "lon": 11.8125212 + }, + "57076": { + "lat": 50.9023972, + "lon": 8.0328723 + }, + "74193": { + "lat": 49.1457847, + "lon": 9.0490182 + }, + "50672": { + "lat": 50.9425062, + "lon": 6.9345727 + }, + "36199": { + "lat": 51.0163485, + "lon": 9.739399 + }, + "92421": { + "lat": 49.3153268, + "lon": 12.0650867 + }, + "74861": { + "lat": 49.29759, + "lon": 9.2801659 + }, + "82152": { + "lat": 48.0984515, + "lon": 11.4274282 + }, + "89134": { + "lat": 48.4332154, + "lon": 9.8730389 + }, + "85375": { + "lat": 48.3222881, + "lon": 11.6696699 + }, + "50968": { + "lat": 50.9014161, + "lon": 6.9666561 + }, + "10587": { + "lat": 52.5179717, + "lon": 13.3188456 + }, + "44339": { + "lat": 51.5660996, + "lon": 7.4634282 + }, + "91126": { + "lat": 49.3197346, + "lon": 11.0058789 + }, + "78462": { + "lat": 47.6620359, + "lon": 9.1704389 + }, + "12277": { + "lat": 52.4154196, + "lon": 13.3781799 + }, + "80997": { + "lat": 48.1927896, + "lon": 11.4844607 + }, + "22299": { + "lat": 53.592923, + "lon": 10.0002707 + }, + "32423": { + "lat": 52.2949237, + "lon": 8.9504689 + }, + "78187": { + "lat": 47.9038998, + "lon": 8.6530838 + }, + "47228": { + "lat": 51.4172098, + "lon": 6.6995226 + }, + "91735": { + "lat": 49.1641497, + "lon": 10.7183644 + }, + "70197": { + "lat": 48.7743488, + "lon": 9.1024708 + }, + "23701": { + "lat": 54.1196445, + "lon": 10.6400269 + }, + "67067": { + "lat": 49.4392191, + "lon": 8.4002324 + }, + "34117": { + "lat": 51.3157257, + "lon": 9.4926363 + }, + "26670": { + "lat": 53.3040802, + "lon": 7.7711552 + }, + "59302": { + "lat": 51.8289338, + "lon": 8.1434012 + }, + "57439": { + "lat": 51.1216585, + "lon": 7.9106444 + }, + "50679": { + "lat": 50.9358894, + "lon": 6.979201 + }, + "48565": { + "lat": 52.1371446, + "lon": 7.3760905 + }, + "63322": { + "lat": 49.9778903, + "lon": 8.8030176 + }, + "48153": { + "lat": 51.9373558, + "lon": 7.630713 + }, + "12049": { + "lat": 52.4778365, + "lon": 13.4222553 + }, + "27404": { + "lat": 53.280695, + "lon": 9.2815154 + }, + "56072": { + "lat": 50.3527777, + "lon": 7.517017 + }, + "06406": { + "lat": 51.7825763, + "lon": 11.7556064 + }, + "68542": { + "lat": 49.5158462, + "lon": 8.6120745 + }, + "28217": { + "lat": 53.0961375, + "lon": 8.7763043 + }, + "22335": { + "lat": 53.6313283, + "lon": 10.005055 + }, + "70195": { + "lat": 48.7856881, + "lon": 9.1243149 + }, + "32760": { + "lat": 51.9062473, + "lon": 8.8874725 + }, + "23568": { + "lat": 53.890909, + "lon": 10.7744232 + }, + "57555": { + "lat": 50.8231404, + "lon": 7.9434087 + }, + "89542": { + "lat": 48.6273822, + "lon": 10.1539367 + }, + "33813": { + "lat": 51.9492295, + "lon": 8.673579 + }, + "04416": { + "lat": 51.2684588, + "lon": 12.3965452 + }, + "47574": { + "lat": 51.6806879, + "lon": 6.1231731 + }, + "70176": { + "lat": 48.7777939, + "lon": 9.1620434 + }, + "51570": { + "lat": 50.7904205, + "lon": 7.5827514 + }, + "79111": { + "lat": 47.9952724, + "lon": 7.7809313 + }, + "70191": { + "lat": 48.7987327, + "lon": 9.1891082 + }, + "83673": { + "lat": 47.7194673, + "lon": 11.4121069 + }, + "52477": { + "lat": 50.8678838, + "lon": 6.1763183 + }, + "90768": { + "lat": 49.4935888, + "lon": 10.9216565 + }, + "41836": { + "lat": 51.0384886, + "lon": 6.2442831 + }, + "65239": { + "lat": 50.0276093, + "lon": 8.3582881 + }, + "69221": { + "lat": 49.4516551, + "lon": 8.6742993 + }, + "10965": { + "lat": 52.4868877, + "lon": 13.3865743 + }, + "04275": { + "lat": 51.3195745, + "lon": 12.3699042 + }, + "81479": { + "lat": 48.0760967, + "lon": 11.5215568 + }, + "52156": { + "lat": 50.5530635, + "lon": 6.2630994 + }, + "66887": { + "lat": 49.5708609, + "lon": 7.5126376 + }, + "33775": { + "lat": 52.0421486, + "lon": 8.1741646 + }, + "14478": { + "lat": 52.3665195, + "lon": 13.0947401 + }, + "46325": { + "lat": 51.8533164, + "lon": 6.8293691 + }, + "80637": { + "lat": 48.1594366, + "lon": 11.5366132 + }, + "59755": { + "lat": 51.4569728, + "lon": 7.9839003 + }, + "91578": { + "lat": 49.2957346, + "lon": 10.4213529 + }, + "26835": { + "lat": 53.302177, + "lon": 7.6133537 + }, + "49565": { + "lat": 52.4199148, + "lon": 8.0118594 + }, + "41748": { + "lat": 51.2466738, + "lon": 6.4367107 + }, + "23556": { + "lat": 53.8736733, + "lon": 10.627504 + }, + "13403": { + "lat": 52.5751043, + "lon": 13.3186742 + }, + "33699": { + "lat": 51.9890201, + "lon": 8.6259807 + }, + "49074": { + "lat": 52.2760322, + "lon": 8.0520849 + }, + "20259": { + "lat": 53.571374, + "lon": 9.9589336 + }, + "86179": { + "lat": 48.2950044, + "lon": 10.9017479 + }, + "76865": { + "lat": 49.1501434, + "lon": 8.1445643 + }, + "39175": { + "lat": 52.1511831, + "lon": 11.7569815 + }, + "04157": { + "lat": 51.3738253, + "lon": 12.3678529 + }, + "14052": { + "lat": 52.5142533, + "lon": 13.2587723 + }, + "86529": { + "lat": 48.5787122, + "lon": 11.2314376 + }, + "61169": { + "lat": 50.3268008, + "lon": 8.7369123 + }, + "24594": { + "lat": 54.0960833, + "lon": 9.6683364 + }, + "66130": { + "lat": 49.1987978, + "lon": 7.0603721 + }, + "12203": { + "lat": 52.4428381, + "lon": 13.3113628 + }, + "42719": { + "lat": 51.1900844, + "lon": 7.048417 + }, + "71032": { + "lat": 48.6817876, + "lon": 9.0480546 + }, + "31737": { + "lat": 52.1601068, + "lon": 9.0932974 + }, + "23566": { + "lat": 53.8718244, + "lon": 10.7363623 + }, + "85276": { + "lat": 48.5434775, + "lon": 11.4838556 + }, + "33824": { + "lat": 52.0793486, + "lon": 8.4217865 + }, + "97440": { + "lat": 49.9862294, + "lon": 10.0955939 + }, + "12526": { + "lat": 52.4004327, + "lon": 13.5662666 + }, + "28844": { + "lat": 52.9967094, + "lon": 8.859078 + }, + "48151": { + "lat": 51.939695, + "lon": 7.6078585 + }, + "26188": { + "lat": 53.1162202, + "lon": 8.0196762 + }, + "75217": { + "lat": 48.8711492, + "lon": 8.6066886 + }, + "01157": { + "lat": 51.0684686, + "lon": 13.6646468 + }, + "09212": { + "lat": 50.8728729, + "lon": 12.7133477 + }, + "77770": { + "lat": 48.4857086, + "lon": 8.0306699 + }, + "14550": { + "lat": 52.4189427, + "lon": 12.770724 + }, + "25774": { + "lat": 54.3135379, + "lon": 9.0083313 + }, + "41061": { + "lat": 51.1943408, + "lon": 6.433353 + }, + "42281": { + "lat": 51.2859753, + "lon": 7.1947824 + }, + "41464": { + "lat": 51.1901268, + "lon": 6.6681998 + }, + "53557": { + "lat": 50.5288429, + "lon": 7.3372336 + }, + "42655": { + "lat": 51.1658371, + "lon": 7.0583627 + }, + "45889": { + "lat": 51.5369896, + "lon": 7.1106775 + }, + "22089": { + "lat": 53.5675844, + "lon": 10.0477763 + }, + "66740": { + "lat": 49.3114303, + "lon": 6.7271048 + }, + "30165": { + "lat": 52.4007589, + "lon": 9.7201547 + }, + "26817": { + "lat": 53.1255126, + "lon": 7.5531302 + }, + "26388": { + "lat": 53.5880645, + "lon": 8.0827844 + }, + "10435": { + "lat": 52.5371985, + "lon": 13.4103257 + }, + "77948": { + "lat": 48.3811043, + "lon": 7.8795944 + }, + "20357": { + "lat": 53.5642527, + "lon": 9.9678986 + }, + "76437": { + "lat": 48.8672308, + "lon": 8.1843707 + }, + "70180": { + "lat": 48.7627075, + "lon": 9.173756 + }, + "29525": { + "lat": 52.9602539, + "lon": 10.5841215 + }, + "22609": { + "lat": 53.5580877, + "lon": 9.8507315 + }, + "22769": { + "lat": 53.5666313, + "lon": 9.9453671 + }, + "49170": { + "lat": 52.1966896, + "lon": 7.9617524 + }, + "06179": { + "lat": 51.4439668, + "lon": 11.8428613 + }, + "76646": { + "lat": 49.1048939, + "lon": 8.6011546 + }, + "66640": { + "lat": 49.5154697, + "lon": 7.1642596 + }, + "35037": { + "lat": 50.8019802, + "lon": 8.7534373 + }, + "17033": { + "lat": 53.5316255, + "lon": 13.259116 + }, + "57562": { + "lat": 50.7803241, + "lon": 7.9418665 + }, + "47475": { + "lat": 51.5200643, + "lon": 6.5331459 + }, + "25358": { + "lat": 53.8214677, + "lon": 9.6181853 + }, + "91242": { + "lat": 49.5003308, + "lon": 11.3409464 + }, + "22337": { + "lat": 53.6228371, + "lon": 10.0541373 + }, + "09125": { + "lat": 50.787209, + "lon": 12.9429497 + }, + "74532": { + "lat": 49.1729333, + "lon": 9.9223018 + }, + "86199": { + "lat": 48.3227064, + "lon": 10.8439401 + }, + "14469": { + "lat": 52.4230779, + "lon": 13.0375312 + }, + "41541": { + "lat": 51.1350698, + "lon": 6.8232105 + }, + "57258": { + "lat": 50.8894272, + "lon": 7.8850991 + }, + "31785": { + "lat": 52.1054558, + "lon": 9.3625582 + }, + "52379": { + "lat": 50.7959349, + "lon": 6.3625594 + }, + "53562": { + "lat": 50.5853146, + "lon": 7.3608497 + }, + "22047": { + "lat": 53.5891876, + "lon": 10.095758 + }, + "50737": { + "lat": 50.9940548, + "lon": 6.9283471 + }, + "08451": { + "lat": 50.8190657, + "lon": 12.3796393 + }, + "31275": { + "lat": 52.3768215, + "lon": 10.0183462 + }, + "87760": { + "lat": 47.9420271, + "lon": 10.2388364 + }, + "38100": { + "lat": 52.2622379, + "lon": 10.5235223 + }, + "17291": { + "lat": 53.2867057, + "lon": 13.9075293 + }, + "18055": { + "lat": 54.0859441, + "lon": 12.1622629 + }, + "34513": { + "lat": 51.2455919, + "lon": 9.0361229 + }, + "71229": { + "lat": 48.7897001, + "lon": 9.0007802 + }, + "40599": { + "lat": 51.1796944, + "lon": 6.8725057 + }, + "45894": { + "lat": 51.5803692, + "lon": 7.0544666 + }, + "41460": { + "lat": 51.2074219, + "lon": 6.708413 + }, + "71522": { + "lat": 48.9474784, + "lon": 9.4280913 + }, + "32257": { + "lat": 52.2106441, + "lon": 8.562226 + }, + "56479": { + "lat": 50.6196216, + "lon": 8.0283948 + }, + "46119": { + "lat": 51.5224185, + "lon": 6.8799162 + }, + "25348": { + "lat": 53.7855628, + "lon": 9.4463377 + }, + "42659": { + "lat": 51.1438175, + "lon": 7.1141132 + }, + "57462": { + "lat": 51.0383244, + "lon": 7.8816773 + }, + "10781": { + "lat": 52.4935864, + "lon": 13.3530545 + }, + "99974": { + "lat": 51.2392332, + "lon": 10.486337 + }, + "33165": { + "lat": 51.6151313, + "lon": 8.8884995 + }, + "86551": { + "lat": 48.4493709, + "lon": 11.1074497 + }, + "31789": { + "lat": 52.0953668, + "lon": 9.4085359 + }, + "78628": { + "lat": 48.1701427, + "lon": 8.6574411 + }, + "65719": { + "lat": 50.0940795, + "lon": 8.4220047 + }, + "48429": { + "lat": 52.2871136, + "lon": 7.4589654 + }, + "47804": { + "lat": 51.3190214, + "lon": 6.5305185 + }, + "20257": { + "lat": 53.5751346, + "lon": 9.9453964 + }, + "76694": { + "lat": 49.1610701, + "lon": 8.5861814 + }, + "25336": { + "lat": 53.7313007, + "lon": 9.6569799 + }, + "90537": { + "lat": 49.3810222, + "lon": 11.2182963 + }, + "82166": { + "lat": 48.1221494, + "lon": 11.4359327 + }, + "19348": { + "lat": 53.075548, + "lon": 11.8212016 + }, + "68642": { + "lat": 49.6437056, + "lon": 8.4685664 + }, + "77723": { + "lat": 48.4044327, + "lon": 8.0355024 + }, + "49809": { + "lat": 52.5270769, + "lon": 7.3403526 + }, + "72401": { + "lat": 48.3656772, + "lon": 8.7940997 + }, + "88046": { + "lat": 47.6596599, + "lon": 9.5041895 + }, + "65474": { + "lat": 49.9865493, + "lon": 8.359341 + }, + "20457": { + "lat": 53.5335376, + "lon": 9.9806284 + } +} \ No newline at end of file diff --git a/curl_debug.sh b/curl_debug.sh new file mode 100644 index 0000000..80d780f --- /dev/null +++ b/curl_debug.sh @@ -0,0 +1,13 @@ +curl http://localhost:5000/api/health +curl http://localhost:5000/api/health +curl http://localhost:5000/api/health +curl http://localhost:5000/api/health + +curl -X POST http://localhost:5000/api/search \ + -H "Content-Type: application/json" \ + -d '{ + "search_term": "Fahrrad", + "min_price": 300, + "max_price": 900, + "max_pages": 1 + }' \ No newline at end of file diff --git a/helper.py b/helper.py deleted file mode 100644 index 9dd7c8a..0000000 --- a/helper.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Author: Hendrik Schutter, mail@hendrikschutter.com - Date of creation: 2025/11/24 - Date of last modification: 2025/11/24 -""" -import random - - -def get_random_user_agent(): - uastrings = [ - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25", - "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10", - "Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403", - "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0", - ] - - return random.choice(uastrings) + str(random.randrange(255)) diff --git a/search_kleinanzeigen.py b/search_kleinanzeigen.py deleted file mode 100644 index cbb378c..0000000 --- a/search_kleinanzeigen.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Author: Hendrik Schutter, mail@hendrikschutter.com -Date of creation: 2025/11/24 -Date of last modification: 2025/11/24 -""" - -from bs4 import BeautifulSoup -from datetime import datetime -from tinydb import TinyDB, Query -import urllib3 -import sys -import helper -from tqdm import tqdm - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - - -def make_soup(url): - user_agent = {"user-agent": helper.get_random_user_agent()} - # print(user_agent) - http = urllib3.PoolManager(10, headers=user_agent) - r = http.request("GET", url) - return BeautifulSoup(r.data, "lxml") - - -def search(search_term, max_pages, min_price, max_price): - # Sorted by newest listing - # No Geo distance - # Only private sellers - # Only offerings - - base_url = "https://www.kleinanzeigen.de" - - found_listings = set() - - for page_counter in range(1, max_pages + 1): - listing_url = ( - base_url - + "/s-anbieter:privat/anzeige:angebote/preis:" - + str(min_price) - + ":" - + str(max_price) - + "/seite:" - + str(page_counter) - + "/" - + search_term.replace(" ", "-") - + "/k0" - ) - - print("Web Page: ", listing_url) - soup = make_soup(listing_url) - results = soup.find_all("li", class_="ad-listitem fully-clickable-card") - # print(len(results)) - - if len(results) <= 0: - break - - for result in results: - try: - # print(result) - listing_url = result.a["href"] - # print("url: " + base_url + listing_url) - found_listings.add(base_url + listing_url) - except (AttributeError, KeyError) as ex: - pass - - return found_listings - - -def scrape_listing(url): - # print("Web Page: ", url) - soup = make_soup(url) - - listing = { - "title": soup.find("h1", class_="boxedarticle--title").text.strip(), - "price": int( - soup.find("h2", class_="boxedarticle--price").text.strip().split(" ")[0] - ), - "id": int( - soup.find("ul", class_="flexlist text-light-800") - .find_all("li", recursive=False)[1] - .text - ), - "zip_code": int( - soup.find_all("div", class_="boxedarticle--details--full")[0] - .find("span", id="viewad-locality") - .text.strip() - .split(" ")[0] - ), - "address": soup.find_all("div", class_="boxedarticle--details--full")[0] - .find("span", id="viewad-locality") - .text.strip() - .partition(" ")[2], - "dateadded": datetime.strptime( - soup.find_all("div", class_="boxedarticle--details--full")[1].span.text, - "%d.%m.%Y", - ), - "first_image": soup.find("div", class_="galleryimage-element current").img[ - "src" - ], - "url": url, - } - return listing - - -if __name__ == "__main__": - found_listings = search("Gravelbike", max_pages=5, min_price=300, max_price=900) - print(found_listings) - print(len(found_listings)) - - # url = "https://www.kleinanzeigen.de/s-anzeige/abus-bordo-6000k-hochwertiges-faltschloss-110-cm/3255935217-217-3407" - # listing = scrape_listing(url) - # print(listing) - # exit() - -for url in tqdm(found_listings, desc="Scraping listings"): - try: - listing = scrape_listing(url) - # print(listing) - except Exception as e: - print("An error occurred:", e) - print("URL: " + url) diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..5332a23 --- /dev/null +++ b/web/index.html @@ -0,0 +1,725 @@ + + + + + + Kleinanzeigen Map Search + + + + + +
+ + +
+
+
No results
+ +
+
Scraping listings...
+
+
+
+
+
+ +
+ + +
+
+
+
+ +
+
+
+
+
+ + + + \ No newline at end of file