web frontend and flask backend
This commit is contained in:
@ -1,15 +1,11 @@
|
||||
# kleinanzeigen-boosted
|
||||
|
||||
scrape kleinanzeigen.de for listings and allow filtering
|
||||
***WIP***
|
||||
|
||||
## Requirements
|
||||
|
||||
```
|
||||
pip install tinydb tqdm beautifulsoup4
|
||||
pip install flask flask-cors beautifulsoup4 lxml urllib3 requests
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Scrape listings
|
||||
Scrape all listings from seller's shop with `python search_kleinanzeigen.py`
|
||||
|
||||
|
||||
369
backend/scrape_proxy.py
Normal file
369
backend/scrape_proxy.py
Normal file
@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Flask API Server for Kleinanzeigen Scraper
|
||||
Author: Hendrik Schutter
|
||||
Date: 2025/11/24
|
||||
"""
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
import urllib3
|
||||
import random
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
# ZIP code cache file
|
||||
CACHE_FILE = "zip_cache.json"
|
||||
zip_cache = {}
|
||||
|
||||
# Active scrape sessions
|
||||
scrape_sessions = {}
|
||||
|
||||
SESSION_TIMEOUT = 300 # seconds
|
||||
|
||||
def cleanup_old_sessions():
|
||||
current_time = time.time()
|
||||
sessions_to_remove = []
|
||||
|
||||
for session_id, session in scrape_sessions.items():
|
||||
if current_time - session.get("created_at", current_time) > SESSION_TIMEOUT:
|
||||
sessions_to_remove.append(session_id)
|
||||
|
||||
for session_id in sessions_to_remove:
|
||||
del scrape_sessions[session_id]
|
||||
print(f"Cleaned up old session: {session_id}")
|
||||
|
||||
return len(sessions_to_remove)
|
||||
|
||||
|
||||
def get_random_user_agent():
|
||||
"""Generate random user agent string"""
|
||||
uastrings = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
]
|
||||
return random.choice(uastrings)
|
||||
|
||||
|
||||
def make_soup(url):
|
||||
"""Fetch URL and return BeautifulSoup object"""
|
||||
user_agent = {"user-agent": get_random_user_agent()}
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data, "lxml")
|
||||
|
||||
|
||||
def geocode_zip(zip_code):
|
||||
"""Geocode ZIP code using Nominatim API with caching"""
|
||||
zip_code = str(zip_code)
|
||||
|
||||
# Check cache first
|
||||
if zip_code in zip_cache:
|
||||
return zip_cache[zip_code]
|
||||
|
||||
# Call Nominatim API
|
||||
url = "https://nominatim.openstreetmap.org/search"
|
||||
params = {
|
||||
"postalcode": zip_code,
|
||||
"country": "Germany",
|
||||
"format": "json",
|
||||
"limit": 1,
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
url, params=params, headers={"user-agent": get_random_user_agent()}
|
||||
)
|
||||
data = response.json()
|
||||
|
||||
if data:
|
||||
coords = {"lat": float(data[0]["lat"]), "lon": float(data[0]["lon"])}
|
||||
zip_cache[zip_code] = coords
|
||||
|
||||
# Save cache
|
||||
with open(CACHE_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(zip_cache, f, ensure_ascii=False, indent=2)
|
||||
|
||||
time.sleep(1) # Respect API rate limits
|
||||
return coords
|
||||
except Exception as e:
|
||||
print(f"Geocoding error for {zip_code}: {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def search_listings(search_term, max_pages, min_price, max_price):
|
||||
"""Search for listings on kleinanzeigen.de - returns only URLs"""
|
||||
base_url = "https://www.kleinanzeigen.de"
|
||||
found_listings = set()
|
||||
|
||||
for page_counter in range(1, max_pages + 1):
|
||||
listing_url = (
|
||||
base_url
|
||||
+ "/s-anbieter:privat/anzeige:angebote/preis:"
|
||||
+ str(min_price)
|
||||
+ ":"
|
||||
+ str(max_price)
|
||||
+ "/seite:"
|
||||
+ str(page_counter)
|
||||
+ "/"
|
||||
+ search_term.replace(" ", "-")
|
||||
+ "/k0"
|
||||
)
|
||||
|
||||
print(f"Scraping page {page_counter}: {listing_url}")
|
||||
|
||||
try:
|
||||
soup = make_soup(listing_url)
|
||||
results = soup.find_all("li", class_="ad-listitem fully-clickable-card")
|
||||
|
||||
if len(results) <= 0:
|
||||
break
|
||||
|
||||
for result in results:
|
||||
try:
|
||||
listing_url = result.a["href"]
|
||||
found_listings.add(base_url + listing_url)
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"Error scraping page {page_counter}: {e}")
|
||||
break
|
||||
|
||||
return list(found_listings)
|
||||
|
||||
|
||||
def scrape_listing(url):
|
||||
"""Scrape individual listing details"""
|
||||
try:
|
||||
soup = make_soup(url)
|
||||
|
||||
title = soup.find("h1", class_="boxedarticle--title")
|
||||
if not title:
|
||||
return None
|
||||
title = title.text.strip()
|
||||
|
||||
price_elem = soup.find("h2", class_="boxedarticle--price")
|
||||
price = 0
|
||||
if price_elem:
|
||||
price_text = price_elem.text.strip().split(" ")[0]
|
||||
try:
|
||||
price = int(price_text.replace(".", "").replace(",", ""))
|
||||
except:
|
||||
price = 0
|
||||
|
||||
flexlist = soup.find("ul", class_="flexlist text-light-800")
|
||||
listing_id = 0
|
||||
if flexlist:
|
||||
flex_items = flexlist.find_all("li", recursive=False)
|
||||
if len(flex_items) > 1:
|
||||
try:
|
||||
listing_id = int(flex_items[1].text.strip())
|
||||
except:
|
||||
pass
|
||||
|
||||
locality = soup.find("span", id="viewad-locality")
|
||||
zip_code = None
|
||||
address = ""
|
||||
if locality:
|
||||
locality_text = locality.text.strip()
|
||||
parts = locality_text.split(" ", 1)
|
||||
if parts:
|
||||
zip_code = parts[0]
|
||||
if len(parts) > 1:
|
||||
address = parts[1]
|
||||
|
||||
date_added = None
|
||||
details_divs = soup.find_all("div", class_="boxedarticle--details--full")
|
||||
if len(details_divs) > 1:
|
||||
date_span = details_divs[1].find("span")
|
||||
if date_span:
|
||||
try:
|
||||
date_added = datetime.strptime(date_span.text, "%d.%m.%Y")
|
||||
except:
|
||||
pass
|
||||
|
||||
first_image = None
|
||||
img_elem = soup.find("div", class_="galleryimage-element current")
|
||||
if img_elem:
|
||||
img = img_elem.find("img")
|
||||
if img and img.get("src"):
|
||||
first_image = img["src"]
|
||||
|
||||
if not zip_code:
|
||||
return None
|
||||
|
||||
listing = {
|
||||
"title": title,
|
||||
"price": price,
|
||||
"id": listing_id,
|
||||
"zip_code": zip_code,
|
||||
"address": address,
|
||||
"date_added": date_added.isoformat() if date_added else None,
|
||||
"image": first_image,
|
||||
"url": url,
|
||||
}
|
||||
|
||||
# Add coordinates
|
||||
coords = geocode_zip(zip_code)
|
||||
if coords and isinstance(coords, dict):
|
||||
listing["lat"] = coords.get("lat")
|
||||
listing["lon"] = coords.get("lon")
|
||||
|
||||
return listing
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error scraping listing {url}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@app.route("/api/search", methods=["POST"])
|
||||
def api_search():
|
||||
"""API endpoint for searching listings - returns only count and URLs"""
|
||||
data = request.json
|
||||
|
||||
# Cleanup old sessions before creating new one
|
||||
cleanup_old_sessions()
|
||||
|
||||
search_term = data.get("search_term", "")
|
||||
max_pages = data.get("max_pages", 1)
|
||||
min_price = data.get("min_price", 0)
|
||||
max_price = data.get("max_price", 10000)
|
||||
|
||||
if not search_term:
|
||||
return jsonify({"error": "Search term is required"}), 400
|
||||
|
||||
try:
|
||||
# Search for listing URLs only
|
||||
listing_urls = search_listings(search_term, max_pages, min_price, max_price)
|
||||
|
||||
# Create session ID
|
||||
session_id = str(uuid.uuid4())
|
||||
|
||||
# Store session with creation timestamp
|
||||
scrape_sessions[session_id] = {
|
||||
"urls": listing_urls,
|
||||
"total": len(listing_urls),
|
||||
"scraped": 0,
|
||||
"listings": [],
|
||||
"cancelled": False,
|
||||
"created_at": time.time(),
|
||||
}
|
||||
|
||||
return jsonify({"session_id": session_id, "total": len(listing_urls)})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@app.route("/api/scrape/<session_id>", methods=["GET"])
|
||||
def api_scrape(session_id):
|
||||
"""API endpoint for scraping next listing in session"""
|
||||
# Cleanup old sessions on each request
|
||||
cleanup_old_sessions()
|
||||
|
||||
if session_id not in scrape_sessions:
|
||||
return jsonify({"error": "Invalid session ID"}), 404
|
||||
|
||||
session = scrape_sessions[session_id]
|
||||
|
||||
if session["cancelled"]:
|
||||
return jsonify({"cancelled": True}), 200
|
||||
|
||||
if session["scraped"] >= session["total"]:
|
||||
return jsonify({"complete": True, "listing": None})
|
||||
|
||||
# Scrape next listing
|
||||
url = session["urls"][session["scraped"]]
|
||||
listing = scrape_listing(url)
|
||||
|
||||
if listing:
|
||||
session["listings"].append(listing)
|
||||
|
||||
session["scraped"] += 1
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"complete": session["scraped"] >= session["total"],
|
||||
"listing": listing,
|
||||
"progress": {"current": session["scraped"], "total": session["total"]},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/api/scrape/<session_id>/cancel", methods=["POST"])
|
||||
def api_cancel_scrape(session_id):
|
||||
"""API endpoint to cancel scraping session"""
|
||||
cleanup_old_sessions()
|
||||
|
||||
if session_id not in scrape_sessions:
|
||||
return jsonify({"error": "Invalid session ID"}), 404
|
||||
|
||||
scrape_sessions[session_id]["cancelled"] = True
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"cancelled": True,
|
||||
"listings": scrape_sessions[session_id]["listings"],
|
||||
"total_scraped": len(scrape_sessions[session_id]["listings"]),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/api/scrape/<session_id>/results", methods=["GET"])
|
||||
def api_get_results(session_id):
|
||||
"""API endpoint to get all scraped results"""
|
||||
cleanup_old_sessions()
|
||||
|
||||
if session_id not in scrape_sessions:
|
||||
return jsonify({"error": "Invalid session ID"}), 404
|
||||
|
||||
session = scrape_sessions[session_id]
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"listings": session["listings"],
|
||||
"total": len(session["listings"]),
|
||||
"progress": {"current": session["scraped"], "total": session["total"]},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/api/health", methods=["GET"])
|
||||
def health():
|
||||
"""Health check endpoint"""
|
||||
cleanup_old_sessions()
|
||||
return jsonify(
|
||||
{
|
||||
"status": "ok",
|
||||
"cache_size": len(zip_cache),
|
||||
"active_sessions": len(scrape_sessions),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Starting Kleinanzeigen Scraper API Server...")
|
||||
|
||||
# Load cache on startup
|
||||
if os.path.exists(CACHE_FILE):
|
||||
with open(CACHE_FILE, "r", encoding="utf-8") as f:
|
||||
zip_cache = json.load(f)
|
||||
|
||||
print(f"Loaded {len(zip_cache)} ZIP codes from cache")
|
||||
print("ZIP code cache loaded with", len(zip_cache), "entries")
|
||||
app.run(debug=True, host="0.0.0.0", port=5000)
|
||||
1374
backend/zip_cache.json
Normal file
1374
backend/zip_cache.json
Normal file
File diff suppressed because it is too large
Load Diff
13
curl_debug.sh
Normal file
13
curl_debug.sh
Normal file
@ -0,0 +1,13 @@
|
||||
curl http://localhost:5000/api/health
|
||||
curl http://localhost:5000/api/health
|
||||
curl http://localhost:5000/api/health
|
||||
curl http://localhost:5000/api/health
|
||||
|
||||
curl -X POST http://localhost:5000/api/search \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"search_term": "Fahrrad",
|
||||
"min_price": 300,
|
||||
"max_price": 900,
|
||||
"max_pages": 1
|
||||
}'
|
||||
21
helper.py
21
helper.py
@ -1,21 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2025/11/24
|
||||
Date of last modification: 2025/11/24
|
||||
"""
|
||||
import random
|
||||
|
||||
|
||||
def get_random_user_agent():
|
||||
uastrings = [
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
|
||||
"Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",
|
||||
]
|
||||
|
||||
return random.choice(uastrings) + str(random.randrange(255))
|
||||
@ -1,123 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2025/11/24
|
||||
Date of last modification: 2025/11/24
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
from tinydb import TinyDB, Query
|
||||
import urllib3
|
||||
import sys
|
||||
import helper
|
||||
from tqdm import tqdm
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
def make_soup(url):
|
||||
user_agent = {"user-agent": helper.get_random_user_agent()}
|
||||
# print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data, "lxml")
|
||||
|
||||
|
||||
def search(search_term, max_pages, min_price, max_price):
|
||||
# Sorted by newest listing
|
||||
# No Geo distance
|
||||
# Only private sellers
|
||||
# Only offerings
|
||||
|
||||
base_url = "https://www.kleinanzeigen.de"
|
||||
|
||||
found_listings = set()
|
||||
|
||||
for page_counter in range(1, max_pages + 1):
|
||||
listing_url = (
|
||||
base_url
|
||||
+ "/s-anbieter:privat/anzeige:angebote/preis:"
|
||||
+ str(min_price)
|
||||
+ ":"
|
||||
+ str(max_price)
|
||||
+ "/seite:"
|
||||
+ str(page_counter)
|
||||
+ "/"
|
||||
+ search_term.replace(" ", "-")
|
||||
+ "/k0"
|
||||
)
|
||||
|
||||
print("Web Page: ", listing_url)
|
||||
soup = make_soup(listing_url)
|
||||
results = soup.find_all("li", class_="ad-listitem fully-clickable-card")
|
||||
# print(len(results))
|
||||
|
||||
if len(results) <= 0:
|
||||
break
|
||||
|
||||
for result in results:
|
||||
try:
|
||||
# print(result)
|
||||
listing_url = result.a["href"]
|
||||
# print("url: " + base_url + listing_url)
|
||||
found_listings.add(base_url + listing_url)
|
||||
except (AttributeError, KeyError) as ex:
|
||||
pass
|
||||
|
||||
return found_listings
|
||||
|
||||
|
||||
def scrape_listing(url):
|
||||
# print("Web Page: ", url)
|
||||
soup = make_soup(url)
|
||||
|
||||
listing = {
|
||||
"title": soup.find("h1", class_="boxedarticle--title").text.strip(),
|
||||
"price": int(
|
||||
soup.find("h2", class_="boxedarticle--price").text.strip().split(" ")[0]
|
||||
),
|
||||
"id": int(
|
||||
soup.find("ul", class_="flexlist text-light-800")
|
||||
.find_all("li", recursive=False)[1]
|
||||
.text
|
||||
),
|
||||
"zip_code": int(
|
||||
soup.find_all("div", class_="boxedarticle--details--full")[0]
|
||||
.find("span", id="viewad-locality")
|
||||
.text.strip()
|
||||
.split(" ")[0]
|
||||
),
|
||||
"address": soup.find_all("div", class_="boxedarticle--details--full")[0]
|
||||
.find("span", id="viewad-locality")
|
||||
.text.strip()
|
||||
.partition(" ")[2],
|
||||
"dateadded": datetime.strptime(
|
||||
soup.find_all("div", class_="boxedarticle--details--full")[1].span.text,
|
||||
"%d.%m.%Y",
|
||||
),
|
||||
"first_image": soup.find("div", class_="galleryimage-element current").img[
|
||||
"src"
|
||||
],
|
||||
"url": url,
|
||||
}
|
||||
return listing
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
found_listings = search("Gravelbike", max_pages=5, min_price=300, max_price=900)
|
||||
print(found_listings)
|
||||
print(len(found_listings))
|
||||
|
||||
# url = "https://www.kleinanzeigen.de/s-anzeige/abus-bordo-6000k-hochwertiges-faltschloss-110-cm/3255935217-217-3407"
|
||||
# listing = scrape_listing(url)
|
||||
# print(listing)
|
||||
# exit()
|
||||
|
||||
for url in tqdm(found_listings, desc="Scraping listings"):
|
||||
try:
|
||||
listing = scrape_listing(url)
|
||||
# print(listing)
|
||||
except Exception as e:
|
||||
print("An error occurred:", e)
|
||||
print("URL: " + url)
|
||||
725
web/index.html
Normal file
725
web/index.html
Normal file
@ -0,0 +1,725 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Kleinanzeigen Map Search</title>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.css" />
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.js"></script>
|
||||
<style>
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
height: 100vh;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.container {
|
||||
display: grid;
|
||||
grid-template-columns: 350px 1fr;
|
||||
grid-template-rows: auto 1fr;
|
||||
height: 100vh;
|
||||
}
|
||||
|
||||
.search-bar {
|
||||
grid-column: 1 / -1;
|
||||
background: #fff;
|
||||
padding: 15px 20px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
align-items: center;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.search-bar input, .search-bar select {
|
||||
padding: 8px 12px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.search-bar input[type="text"] {
|
||||
flex: 1;
|
||||
min-width: 200px;
|
||||
}
|
||||
|
||||
.search-bar input[type="number"] {
|
||||
width: 100px;
|
||||
}
|
||||
|
||||
.search-bar button {
|
||||
padding: 8px 20px;
|
||||
background: #0066cc;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.search-bar button:hover:not(:disabled) {
|
||||
background: #0052a3;
|
||||
}
|
||||
|
||||
.search-bar button:disabled {
|
||||
background: #ccc;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.search-bar button.cancel {
|
||||
background: #dc3545;
|
||||
}
|
||||
|
||||
.search-bar button.cancel:hover {
|
||||
background: #c82333;
|
||||
}
|
||||
|
||||
.results-panel {
|
||||
background: #f8f9fa;
|
||||
overflow-y: auto;
|
||||
border-right: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.results-header {
|
||||
background: white;
|
||||
padding: 15px 20px;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.results-count {
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.progress-info {
|
||||
background: #e3f2fd;
|
||||
padding: 12px;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 10px;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.progress-info.active {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.progress-bar {
|
||||
width: 100%;
|
||||
height: 8px;
|
||||
background: #e0e0e0;
|
||||
border-radius: 4px;
|
||||
overflow: hidden;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.progress-fill {
|
||||
height: 100%;
|
||||
background: #0066cc;
|
||||
width: 0%;
|
||||
transition: width 0.3s;
|
||||
}
|
||||
|
||||
.progress-text {
|
||||
font-size: 12px;
|
||||
color: #1565c0;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.eta-text {
|
||||
font-size: 11px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.sort-control {
|
||||
display: flex;
|
||||
gap: 5px;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.sort-control label {
|
||||
font-size: 13px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.sort-control select {
|
||||
padding: 5px 8px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.result-item {
|
||||
background: white;
|
||||
margin: 10px;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.result-item:hover {
|
||||
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
.result-item.selected {
|
||||
border: 2px solid #0066cc;
|
||||
}
|
||||
|
||||
.result-image {
|
||||
width: 100%;
|
||||
height: 180px;
|
||||
object-fit: cover;
|
||||
background: #e9ecef;
|
||||
}
|
||||
|
||||
.result-content {
|
||||
padding: 12px;
|
||||
}
|
||||
|
||||
.result-title {
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
margin-bottom: 8px;
|
||||
font-size: 14px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.result-price {
|
||||
color: #0066cc;
|
||||
font-weight: 700;
|
||||
font-size: 18px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.result-meta {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.result-location {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.result-date {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.map-container {
|
||||
position: relative;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
#map {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.status-bar {
|
||||
position: absolute;
|
||||
top: 10px;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
background: white;
|
||||
padding: 10px 20px;
|
||||
border-radius: 4px;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.2);
|
||||
z-index: 1000;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.status-bar.visible {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.status-bar.loading {
|
||||
background: #e3f2fd;
|
||||
color: #1565c0;
|
||||
}
|
||||
|
||||
.status-bar.success {
|
||||
background: #e8f5e9;
|
||||
color: #2e7d32;
|
||||
}
|
||||
|
||||
.status-bar.error {
|
||||
background: #ffebee;
|
||||
color: #c62828;
|
||||
}
|
||||
|
||||
.no-results {
|
||||
text-align: center;
|
||||
padding: 40px 20px;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.loading-spinner {
|
||||
display: inline-block;
|
||||
width: 14px;
|
||||
height: 14px;
|
||||
border: 2px solid #1565c0;
|
||||
border-radius: 50%;
|
||||
border-top-color: transparent;
|
||||
animation: spin 0.8s linear infinite;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
@media (max-width: 1024px) {
|
||||
.container {
|
||||
grid-template-columns: 300px 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.container {
|
||||
grid-template-columns: 1fr;
|
||||
grid-template-rows: auto auto 1fr;
|
||||
}
|
||||
|
||||
.results-panel {
|
||||
max-height: 300px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="search-bar">
|
||||
<input type="text" id="searchTerm" placeholder="Search term (e.g., Cube Nuroad)" value="Fahrrad">
|
||||
<input type="number" id="minPrice" placeholder="Min €" value="300" min="0">
|
||||
<input type="number" id="maxPrice" placeholder="Max €" value="900" min="0">
|
||||
<input type="number" id="maxPages" placeholder="Pages" value="1" min="1" max="20">
|
||||
<button id="searchBtn">Search</button>
|
||||
<button id="cancelBtn" class="cancel" style="display: none;">Cancel</button>
|
||||
</div>
|
||||
|
||||
<div class="results-panel">
|
||||
<div class="results-header">
|
||||
<div class="results-count">No results</div>
|
||||
|
||||
<div id="progressInfo" class="progress-info">
|
||||
<div class="progress-text">Scraping listings...</div>
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill"></div>
|
||||
</div>
|
||||
<div class="eta-text"></div>
|
||||
</div>
|
||||
|
||||
<div class="sort-control">
|
||||
<label>Sort:</label>
|
||||
<select id="sortSelect">
|
||||
<option value="date-desc">Date (newest)</option>
|
||||
<option value="date-asc">Date (oldest)</option>
|
||||
<option value="price-asc">Price (low to high)</option>
|
||||
<option value="price-desc">Price (high to low)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div id="resultsList"></div>
|
||||
</div>
|
||||
|
||||
<div class="map-container">
|
||||
<div id="statusBar" class="status-bar"></div>
|
||||
<div id="map"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const API_BASE_URL = 'http://localhost:5000';
|
||||
let map;
|
||||
let markers = [];
|
||||
let allListings = [];
|
||||
let selectedListingId = null;
|
||||
let currentSessionId = null;
|
||||
let scrapeStartTime = null;
|
||||
let isScrapingActive = false;
|
||||
|
||||
// Initialize map
|
||||
function initMap() {
|
||||
map = L.map('map').setView([51.1657, 10.4515], 6);
|
||||
L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
|
||||
attribution: '© OpenStreetMap contributors',
|
||||
maxZoom: 19
|
||||
}).addTo(map);
|
||||
}
|
||||
|
||||
// Show status message
|
||||
function showStatus(message, type = 'loading') {
|
||||
const statusBar = document.getElementById('statusBar');
|
||||
statusBar.className = `status-bar visible ${type}`;
|
||||
|
||||
if (type === 'loading') {
|
||||
statusBar.innerHTML = `<span class="loading-spinner"></span>${message}`;
|
||||
} else {
|
||||
statusBar.textContent = message;
|
||||
}
|
||||
|
||||
if (type !== 'loading') {
|
||||
setTimeout(() => {
|
||||
statusBar.classList.remove('visible');
|
||||
}, 3000);
|
||||
}
|
||||
}
|
||||
|
||||
// Update progress
|
||||
function updateProgress(current, total) {
|
||||
const progressInfo = document.getElementById('progressInfo');
|
||||
const progressFill = progressInfo.querySelector('.progress-fill');
|
||||
const progressText = progressInfo.querySelector('.progress-text');
|
||||
const etaText = progressInfo.querySelector('.eta-text');
|
||||
|
||||
if (total === 0) {
|
||||
progressInfo.classList.remove('active');
|
||||
return;
|
||||
}
|
||||
|
||||
progressInfo.classList.add('active');
|
||||
const percentage = (current / total) * 100;
|
||||
progressFill.style.width = percentage + '%';
|
||||
progressText.textContent = `Scraping listings: ${current}/${total}`;
|
||||
|
||||
// Calculate ETA
|
||||
if (scrapeStartTime && current > 0) {
|
||||
const elapsed = (Date.now() - scrapeStartTime) / 1000;
|
||||
const avgTimePerListing = elapsed / current;
|
||||
const remaining = total - current;
|
||||
const etaSeconds = Math.round(avgTimePerListing * remaining);
|
||||
|
||||
const minutes = Math.floor(etaSeconds / 60);
|
||||
const seconds = etaSeconds % 60;
|
||||
|
||||
if (minutes > 0) {
|
||||
etaText.textContent = `ETA: ~${minutes}m ${seconds}s`;
|
||||
} else {
|
||||
etaText.textContent = `ETA: ~${seconds}s`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear all markers from map
|
||||
function clearMarkers() {
|
||||
markers.forEach(marker => map.removeLayer(marker));
|
||||
markers = [];
|
||||
}
|
||||
|
||||
// Add marker to map
|
||||
function addMarker(listing) {
|
||||
if (!listing.lat || !listing.lon) return;
|
||||
|
||||
const marker = L.marker([listing.lat, listing.lon]).addTo(map);
|
||||
|
||||
const imageHtml = listing.image
|
||||
? `<img src="${listing.image}" style="width: 100%; max-height: 150px; object-fit: cover; margin: 8px 0;" alt="${listing.title}">`
|
||||
: '';
|
||||
|
||||
const popupContent = `
|
||||
<div style="min-width: 200px;">
|
||||
<strong style="font-size: 14px;">${listing.title}</strong><br>
|
||||
${imageHtml}
|
||||
<span style="color: #0066cc; font-weight: bold; font-size: 16px;">€${listing.price}</span><br>
|
||||
<span style="color: #666; font-size: 12px;">${listing.address}</span><br>
|
||||
<a href="${listing.url}" target="_blank" style="color: #0066cc; text-decoration: none; font-weight: 600;">Open in new tab →</a>
|
||||
</div>
|
||||
`;
|
||||
|
||||
marker.bindPopup(popupContent);
|
||||
|
||||
marker.on('click', () => {
|
||||
selectedListingId = listing.id;
|
||||
highlightSelectedListing();
|
||||
});
|
||||
|
||||
markers.push(marker);
|
||||
}
|
||||
|
||||
// Highlight selected listing in results list
|
||||
function highlightSelectedListing() {
|
||||
document.querySelectorAll('.result-item').forEach(item => {
|
||||
const itemId = parseInt(item.dataset.id);
|
||||
if (itemId === selectedListingId) {
|
||||
item.classList.add('selected');
|
||||
item.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
|
||||
} else {
|
||||
item.classList.remove('selected');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Format date
|
||||
function formatDate(dateString) {
|
||||
if (!dateString) return 'Unknown date';
|
||||
const date = new Date(dateString);
|
||||
return date.toLocaleDateString('de-DE');
|
||||
}
|
||||
|
||||
// Render results list
|
||||
function renderResults(listings) {
|
||||
const resultsList = document.getElementById('resultsList');
|
||||
const resultsCount = document.querySelector('.results-count');
|
||||
|
||||
if (listings.length === 0) {
|
||||
resultsList.innerHTML = '<div class="no-results">No listings found</div>';
|
||||
resultsCount.textContent = 'No results';
|
||||
return;
|
||||
}
|
||||
|
||||
resultsCount.textContent = `${listings.length} result${listings.length !== 1 ? 's' : ''}`;
|
||||
|
||||
resultsList.innerHTML = listings.map(listing => `
|
||||
<div class="result-item" data-id="${listing.id}">
|
||||
${listing.image ? `<img src="${listing.image}" class="result-image" alt="${listing.title}">` : '<div class="result-image"></div>'}
|
||||
<div class="result-content">
|
||||
<div class="result-title">${listing.title}</div>
|
||||
<div class="result-price">€${listing.price}</div>
|
||||
<div class="result-meta">
|
||||
<div class="result-location">
|
||||
<span>📍</span>
|
||||
<span>${listing.address || listing.zip_code}</span>
|
||||
</div>
|
||||
<div class="result-date">${formatDate(listing.date_added)}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
// Add click handlers
|
||||
document.querySelectorAll('.result-item').forEach(item => {
|
||||
item.addEventListener('click', () => {
|
||||
const id = parseInt(item.dataset.id);
|
||||
const listing = listings.find(l => l.id === id);
|
||||
if (listing) {
|
||||
selectedListingId = id;
|
||||
highlightSelectedListing();
|
||||
|
||||
if (listing.lat && listing.lon) {
|
||||
map.setView([listing.lat, listing.lon], 13);
|
||||
const marker = markers.find(m =>
|
||||
m.getLatLng().lat === listing.lat &&
|
||||
m.getLatLng().lng === listing.lon
|
||||
);
|
||||
if (marker) {
|
||||
marker.openPopup();
|
||||
}
|
||||
}
|
||||
|
||||
// Open listing in new tab
|
||||
window.open(listing.url, '_blank');
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Sort listings
|
||||
function sortListings(listings, sortBy) {
|
||||
const sorted = [...listings];
|
||||
|
||||
switch(sortBy) {
|
||||
case 'price-asc':
|
||||
sorted.sort((a, b) => a.price - b.price);
|
||||
break;
|
||||
case 'price-desc':
|
||||
sorted.sort((a, b) => b.price - a.price);
|
||||
break;
|
||||
case 'date-asc':
|
||||
sorted.sort((a, b) => new Date(a.date_added || 0) - new Date(b.date_added || 0));
|
||||
break;
|
||||
case 'date-desc':
|
||||
sorted.sort((a, b) => new Date(b.date_added || 0) - new Date(a.date_added || 0));
|
||||
break;
|
||||
}
|
||||
|
||||
return sorted;
|
||||
}
|
||||
|
||||
// Scrape next listing
|
||||
async function scrapeNextListing() {
|
||||
if (!currentSessionId || !isScrapingActive) {
|
||||
console.log('Scraping stopped: session or active flag cleared');
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/scrape/${currentSessionId}`);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.cancelled) {
|
||||
console.log('Scraping cancelled by backend');
|
||||
return false;
|
||||
}
|
||||
|
||||
if (data.listing) {
|
||||
allListings.push(data.listing);
|
||||
addMarker(data.listing);
|
||||
|
||||
// Update display
|
||||
const sortBy = document.getElementById('sortSelect').value;
|
||||
const sortedListings = sortListings(allListings, sortBy);
|
||||
renderResults(sortedListings);
|
||||
|
||||
// Fit map to markers
|
||||
if (markers.length > 0) {
|
||||
const group = L.featureGroup(markers);
|
||||
map.fitBounds(group.getBounds().pad(0.1));
|
||||
}
|
||||
}
|
||||
|
||||
updateProgress(data.progress.current, data.progress.total);
|
||||
|
||||
if (data.complete) {
|
||||
console.log('Scraping complete, finalizing...');
|
||||
isScrapingActive = false;
|
||||
document.getElementById('searchBtn').disabled = false;
|
||||
document.getElementById('cancelBtn').style.display = 'none';
|
||||
updateProgress(0, 0);
|
||||
showStatus(`Completed! Scraped ${allListings.length} listings`, 'success');
|
||||
|
||||
// DO NOT clear session ID - keep it for potential future use
|
||||
// DO NOT reset listings or markers
|
||||
console.log('Final listings count:', allListings.length);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
} catch (error) {
|
||||
console.error('Scrape error:', error);
|
||||
isScrapingActive = false;
|
||||
document.getElementById('searchBtn').disabled = false;
|
||||
document.getElementById('cancelBtn').style.display = 'none';
|
||||
showStatus('Error occurred during scraping', 'error');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Start scraping loop
|
||||
async function startScrapingLoop() {
|
||||
while (isScrapingActive && currentSessionId) {
|
||||
const shouldContinue = await scrapeNextListing();
|
||||
if (!shouldContinue) {
|
||||
break;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
}
|
||||
|
||||
// Search listings
|
||||
async function searchListings() {
|
||||
const searchTerm = document.getElementById('searchTerm').value.trim();
|
||||
const minPrice = parseInt(document.getElementById('minPrice').value) || 0;
|
||||
const maxPrice = parseInt(document.getElementById('maxPrice').value) || 10000;
|
||||
const maxPages = parseInt(document.getElementById('maxPages').value) || 5;
|
||||
|
||||
if (!searchTerm) {
|
||||
showStatus('Please enter a search term', 'error');
|
||||
return;
|
||||
}
|
||||
|
||||
document.getElementById('searchBtn').disabled = true;
|
||||
clearMarkers();
|
||||
allListings = [];
|
||||
selectedListingId = null;
|
||||
document.getElementById('resultsList').innerHTML = '';
|
||||
|
||||
showStatus('Searching for listings...', 'loading');
|
||||
|
||||
try {
|
||||
const response = await fetch(`${API_BASE_URL}/api/search`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
search_term: searchTerm,
|
||||
min_price: minPrice,
|
||||
max_price: maxPrice,
|
||||
max_pages: maxPages
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('API request failed');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
currentSessionId = data.session_id;
|
||||
|
||||
if (data.total === 0) {
|
||||
showStatus('No listings found', 'error');
|
||||
document.getElementById('searchBtn').disabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
showStatus(`Found ${data.total} listings. Starting scrape...`, 'success');
|
||||
|
||||
// Show cancel button
|
||||
document.getElementById('cancelBtn').style.display = 'inline-block';
|
||||
|
||||
// Start scraping
|
||||
isScrapingActive = true;
|
||||
scrapeStartTime = Date.now();
|
||||
updateProgress(0, data.total);
|
||||
startScrapingLoop();
|
||||
|
||||
} catch (error) {
|
||||
console.error('Search error:', error);
|
||||
showStatus('Error: Could not connect to API server', 'error');
|
||||
document.getElementById('searchBtn').disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Cancel scraping
|
||||
async function cancelScraping() {
|
||||
if (!currentSessionId) return;
|
||||
|
||||
try {
|
||||
await fetch(`${API_BASE_URL}/api/scrape/${currentSessionId}/cancel`, {
|
||||
method: 'POST'
|
||||
});
|
||||
|
||||
isScrapingActive = false;
|
||||
document.getElementById('searchBtn').disabled = false;
|
||||
document.getElementById('cancelBtn').style.display = 'none';
|
||||
updateProgress(0, 0);
|
||||
showStatus(`Cancelled. Showing ${allListings.length} scraped listings`, 'error');
|
||||
|
||||
} catch (error) {
|
||||
console.error('Cancel error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Event listeners
|
||||
document.getElementById('searchBtn').addEventListener('click', searchListings);
|
||||
document.getElementById('cancelBtn').addEventListener('click', cancelScraping);
|
||||
|
||||
document.getElementById('searchTerm').addEventListener('keypress', (e) => {
|
||||
if (e.key === 'Enter') searchListings();
|
||||
});
|
||||
|
||||
document.getElementById('sortSelect').addEventListener('change', (e) => {
|
||||
if (allListings.length > 0) {
|
||||
const sortedListings = sortListings(allListings, e.target.value);
|
||||
renderResults(sortedListings);
|
||||
}
|
||||
});
|
||||
|
||||
// Initialize
|
||||
initMap();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user