web frontend and flask backend

This commit is contained in:
2025-11-25 16:41:52 +01:00
parent da702fb4e6
commit dd36618802
7 changed files with 2483 additions and 150 deletions

View File

@ -1,15 +1,11 @@
# kleinanzeigen-boosted
scrape kleinanzeigen.de for listings and allow filtering
***WIP***
## Requirements
```
pip install tinydb tqdm beautifulsoup4
pip install flask flask-cors beautifulsoup4 lxml urllib3 requests
```
## Usage
### Scrape listings
Scrape all listings from seller's shop with `python search_kleinanzeigen.py`

369
backend/scrape_proxy.py Normal file
View File

@ -0,0 +1,369 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Flask API Server for Kleinanzeigen Scraper
Author: Hendrik Schutter
Date: 2025/11/24
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
from bs4 import BeautifulSoup
from datetime import datetime
import urllib3
import random
import requests
import time
import json
import os
import uuid
app = Flask(__name__)
CORS(app)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# ZIP code cache file
CACHE_FILE = "zip_cache.json"
zip_cache = {}
# Active scrape sessions
scrape_sessions = {}
SESSION_TIMEOUT = 300 # seconds
def cleanup_old_sessions():
current_time = time.time()
sessions_to_remove = []
for session_id, session in scrape_sessions.items():
if current_time - session.get("created_at", current_time) > SESSION_TIMEOUT:
sessions_to_remove.append(session_id)
for session_id in sessions_to_remove:
del scrape_sessions[session_id]
print(f"Cleaned up old session: {session_id}")
return len(sessions_to_remove)
def get_random_user_agent():
"""Generate random user agent string"""
uastrings = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
]
return random.choice(uastrings)
def make_soup(url):
"""Fetch URL and return BeautifulSoup object"""
user_agent = {"user-agent": get_random_user_agent()}
http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url)
return BeautifulSoup(r.data, "lxml")
def geocode_zip(zip_code):
"""Geocode ZIP code using Nominatim API with caching"""
zip_code = str(zip_code)
# Check cache first
if zip_code in zip_cache:
return zip_cache[zip_code]
# Call Nominatim API
url = "https://nominatim.openstreetmap.org/search"
params = {
"postalcode": zip_code,
"country": "Germany",
"format": "json",
"limit": 1,
}
try:
response = requests.get(
url, params=params, headers={"user-agent": get_random_user_agent()}
)
data = response.json()
if data:
coords = {"lat": float(data[0]["lat"]), "lon": float(data[0]["lon"])}
zip_cache[zip_code] = coords
# Save cache
with open(CACHE_FILE, "w", encoding="utf-8") as f:
json.dump(zip_cache, f, ensure_ascii=False, indent=2)
time.sleep(1) # Respect API rate limits
return coords
except Exception as e:
print(f"Geocoding error for {zip_code}: {e}")
return None
def search_listings(search_term, max_pages, min_price, max_price):
"""Search for listings on kleinanzeigen.de - returns only URLs"""
base_url = "https://www.kleinanzeigen.de"
found_listings = set()
for page_counter in range(1, max_pages + 1):
listing_url = (
base_url
+ "/s-anbieter:privat/anzeige:angebote/preis:"
+ str(min_price)
+ ":"
+ str(max_price)
+ "/seite:"
+ str(page_counter)
+ "/"
+ search_term.replace(" ", "-")
+ "/k0"
)
print(f"Scraping page {page_counter}: {listing_url}")
try:
soup = make_soup(listing_url)
results = soup.find_all("li", class_="ad-listitem fully-clickable-card")
if len(results) <= 0:
break
for result in results:
try:
listing_url = result.a["href"]
found_listings.add(base_url + listing_url)
except (AttributeError, KeyError):
pass
except Exception as e:
print(f"Error scraping page {page_counter}: {e}")
break
return list(found_listings)
def scrape_listing(url):
"""Scrape individual listing details"""
try:
soup = make_soup(url)
title = soup.find("h1", class_="boxedarticle--title")
if not title:
return None
title = title.text.strip()
price_elem = soup.find("h2", class_="boxedarticle--price")
price = 0
if price_elem:
price_text = price_elem.text.strip().split(" ")[0]
try:
price = int(price_text.replace(".", "").replace(",", ""))
except:
price = 0
flexlist = soup.find("ul", class_="flexlist text-light-800")
listing_id = 0
if flexlist:
flex_items = flexlist.find_all("li", recursive=False)
if len(flex_items) > 1:
try:
listing_id = int(flex_items[1].text.strip())
except:
pass
locality = soup.find("span", id="viewad-locality")
zip_code = None
address = ""
if locality:
locality_text = locality.text.strip()
parts = locality_text.split(" ", 1)
if parts:
zip_code = parts[0]
if len(parts) > 1:
address = parts[1]
date_added = None
details_divs = soup.find_all("div", class_="boxedarticle--details--full")
if len(details_divs) > 1:
date_span = details_divs[1].find("span")
if date_span:
try:
date_added = datetime.strptime(date_span.text, "%d.%m.%Y")
except:
pass
first_image = None
img_elem = soup.find("div", class_="galleryimage-element current")
if img_elem:
img = img_elem.find("img")
if img and img.get("src"):
first_image = img["src"]
if not zip_code:
return None
listing = {
"title": title,
"price": price,
"id": listing_id,
"zip_code": zip_code,
"address": address,
"date_added": date_added.isoformat() if date_added else None,
"image": first_image,
"url": url,
}
# Add coordinates
coords = geocode_zip(zip_code)
if coords and isinstance(coords, dict):
listing["lat"] = coords.get("lat")
listing["lon"] = coords.get("lon")
return listing
except Exception as e:
print(f"Error scraping listing {url}: {e}")
return None
@app.route("/api/search", methods=["POST"])
def api_search():
"""API endpoint for searching listings - returns only count and URLs"""
data = request.json
# Cleanup old sessions before creating new one
cleanup_old_sessions()
search_term = data.get("search_term", "")
max_pages = data.get("max_pages", 1)
min_price = data.get("min_price", 0)
max_price = data.get("max_price", 10000)
if not search_term:
return jsonify({"error": "Search term is required"}), 400
try:
# Search for listing URLs only
listing_urls = search_listings(search_term, max_pages, min_price, max_price)
# Create session ID
session_id = str(uuid.uuid4())
# Store session with creation timestamp
scrape_sessions[session_id] = {
"urls": listing_urls,
"total": len(listing_urls),
"scraped": 0,
"listings": [],
"cancelled": False,
"created_at": time.time(),
}
return jsonify({"session_id": session_id, "total": len(listing_urls)})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/api/scrape/<session_id>", methods=["GET"])
def api_scrape(session_id):
"""API endpoint for scraping next listing in session"""
# Cleanup old sessions on each request
cleanup_old_sessions()
if session_id not in scrape_sessions:
return jsonify({"error": "Invalid session ID"}), 404
session = scrape_sessions[session_id]
if session["cancelled"]:
return jsonify({"cancelled": True}), 200
if session["scraped"] >= session["total"]:
return jsonify({"complete": True, "listing": None})
# Scrape next listing
url = session["urls"][session["scraped"]]
listing = scrape_listing(url)
if listing:
session["listings"].append(listing)
session["scraped"] += 1
return jsonify(
{
"complete": session["scraped"] >= session["total"],
"listing": listing,
"progress": {"current": session["scraped"], "total": session["total"]},
}
)
@app.route("/api/scrape/<session_id>/cancel", methods=["POST"])
def api_cancel_scrape(session_id):
"""API endpoint to cancel scraping session"""
cleanup_old_sessions()
if session_id not in scrape_sessions:
return jsonify({"error": "Invalid session ID"}), 404
scrape_sessions[session_id]["cancelled"] = True
return jsonify(
{
"cancelled": True,
"listings": scrape_sessions[session_id]["listings"],
"total_scraped": len(scrape_sessions[session_id]["listings"]),
}
)
@app.route("/api/scrape/<session_id>/results", methods=["GET"])
def api_get_results(session_id):
"""API endpoint to get all scraped results"""
cleanup_old_sessions()
if session_id not in scrape_sessions:
return jsonify({"error": "Invalid session ID"}), 404
session = scrape_sessions[session_id]
return jsonify(
{
"listings": session["listings"],
"total": len(session["listings"]),
"progress": {"current": session["scraped"], "total": session["total"]},
}
)
@app.route("/api/health", methods=["GET"])
def health():
"""Health check endpoint"""
cleanup_old_sessions()
return jsonify(
{
"status": "ok",
"cache_size": len(zip_cache),
"active_sessions": len(scrape_sessions),
}
)
if __name__ == "__main__":
print("Starting Kleinanzeigen Scraper API Server...")
# Load cache on startup
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, "r", encoding="utf-8") as f:
zip_cache = json.load(f)
print(f"Loaded {len(zip_cache)} ZIP codes from cache")
print("ZIP code cache loaded with", len(zip_cache), "entries")
app.run(debug=True, host="0.0.0.0", port=5000)

1374
backend/zip_cache.json Normal file

File diff suppressed because it is too large Load Diff

13
curl_debug.sh Normal file
View File

@ -0,0 +1,13 @@
curl http://localhost:5000/api/health
curl http://localhost:5000/api/health
curl http://localhost:5000/api/health
curl http://localhost:5000/api/health
curl -X POST http://localhost:5000/api/search \
-H "Content-Type: application/json" \
-d '{
"search_term": "Fahrrad",
"min_price": 300,
"max_price": 900,
"max_pages": 1
}'

View File

@ -1,21 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2025/11/24
Date of last modification: 2025/11/24
"""
import random
def get_random_user_agent():
uastrings = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/600.1.25 (KHTML, like Gecko) Version/8.0 Safari/600.1.25",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/7.1 Safari/537.85.10",
"Mozilla/5.0 (Linux; Android 10.1; TV BOX) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/58.2.2878.53403",
"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0",
]
return random.choice(uastrings) + str(random.randrange(255))

View File

@ -1,123 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation: 2025/11/24
Date of last modification: 2025/11/24
"""
from bs4 import BeautifulSoup
from datetime import datetime
from tinydb import TinyDB, Query
import urllib3
import sys
import helper
from tqdm import tqdm
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def make_soup(url):
user_agent = {"user-agent": helper.get_random_user_agent()}
# print(user_agent)
http = urllib3.PoolManager(10, headers=user_agent)
r = http.request("GET", url)
return BeautifulSoup(r.data, "lxml")
def search(search_term, max_pages, min_price, max_price):
# Sorted by newest listing
# No Geo distance
# Only private sellers
# Only offerings
base_url = "https://www.kleinanzeigen.de"
found_listings = set()
for page_counter in range(1, max_pages + 1):
listing_url = (
base_url
+ "/s-anbieter:privat/anzeige:angebote/preis:"
+ str(min_price)
+ ":"
+ str(max_price)
+ "/seite:"
+ str(page_counter)
+ "/"
+ search_term.replace(" ", "-")
+ "/k0"
)
print("Web Page: ", listing_url)
soup = make_soup(listing_url)
results = soup.find_all("li", class_="ad-listitem fully-clickable-card")
# print(len(results))
if len(results) <= 0:
break
for result in results:
try:
# print(result)
listing_url = result.a["href"]
# print("url: " + base_url + listing_url)
found_listings.add(base_url + listing_url)
except (AttributeError, KeyError) as ex:
pass
return found_listings
def scrape_listing(url):
# print("Web Page: ", url)
soup = make_soup(url)
listing = {
"title": soup.find("h1", class_="boxedarticle--title").text.strip(),
"price": int(
soup.find("h2", class_="boxedarticle--price").text.strip().split(" ")[0]
),
"id": int(
soup.find("ul", class_="flexlist text-light-800")
.find_all("li", recursive=False)[1]
.text
),
"zip_code": int(
soup.find_all("div", class_="boxedarticle--details--full")[0]
.find("span", id="viewad-locality")
.text.strip()
.split(" ")[0]
),
"address": soup.find_all("div", class_="boxedarticle--details--full")[0]
.find("span", id="viewad-locality")
.text.strip()
.partition(" ")[2],
"dateadded": datetime.strptime(
soup.find_all("div", class_="boxedarticle--details--full")[1].span.text,
"%d.%m.%Y",
),
"first_image": soup.find("div", class_="galleryimage-element current").img[
"src"
],
"url": url,
}
return listing
if __name__ == "__main__":
found_listings = search("Gravelbike", max_pages=5, min_price=300, max_price=900)
print(found_listings)
print(len(found_listings))
# url = "https://www.kleinanzeigen.de/s-anzeige/abus-bordo-6000k-hochwertiges-faltschloss-110-cm/3255935217-217-3407"
# listing = scrape_listing(url)
# print(listing)
# exit()
for url in tqdm(found_listings, desc="Scraping listings"):
try:
listing = scrape_listing(url)
# print(listing)
except Exception as e:
print("An error occurred:", e)
print("URL: " + url)

725
web/index.html Normal file
View File

@ -0,0 +1,725 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kleinanzeigen Map Search</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.css" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.9.4/leaflet.min.js"></script>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
height: 100vh;
overflow: hidden;
}
.container {
display: grid;
grid-template-columns: 350px 1fr;
grid-template-rows: auto 1fr;
height: 100vh;
}
.search-bar {
grid-column: 1 / -1;
background: #fff;
padding: 15px 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
display: flex;
gap: 10px;
align-items: center;
flex-wrap: wrap;
}
.search-bar input, .search-bar select {
padding: 8px 12px;
border: 1px solid #ddd;
border-radius: 4px;
font-size: 14px;
}
.search-bar input[type="text"] {
flex: 1;
min-width: 200px;
}
.search-bar input[type="number"] {
width: 100px;
}
.search-bar button {
padding: 8px 20px;
background: #0066cc;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-weight: 600;
}
.search-bar button:hover:not(:disabled) {
background: #0052a3;
}
.search-bar button:disabled {
background: #ccc;
cursor: not-allowed;
}
.search-bar button.cancel {
background: #dc3545;
}
.search-bar button.cancel:hover {
background: #c82333;
}
.results-panel {
background: #f8f9fa;
overflow-y: auto;
border-right: 1px solid #ddd;
}
.results-header {
background: white;
padding: 15px 20px;
border-bottom: 1px solid #ddd;
}
.results-count {
font-weight: 600;
color: #333;
margin-bottom: 10px;
}
.progress-info {
background: #e3f2fd;
padding: 12px;
border-radius: 4px;
margin-bottom: 10px;
display: none;
}
.progress-info.active {
display: block;
}
.progress-bar {
width: 100%;
height: 8px;
background: #e0e0e0;
border-radius: 4px;
overflow: hidden;
margin-top: 8px;
}
.progress-fill {
height: 100%;
background: #0066cc;
width: 0%;
transition: width 0.3s;
}
.progress-text {
font-size: 12px;
color: #1565c0;
margin-bottom: 4px;
}
.eta-text {
font-size: 11px;
color: #666;
}
.sort-control {
display: flex;
gap: 5px;
align-items: center;
}
.sort-control label {
font-size: 13px;
color: #666;
}
.sort-control select {
padding: 5px 8px;
border: 1px solid #ddd;
border-radius: 4px;
font-size: 13px;
}
.result-item {
background: white;
margin: 10px;
border-radius: 8px;
overflow: hidden;
cursor: pointer;
transition: all 0.2s;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.result-item:hover {
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
transform: translateY(-2px);
}
.result-item.selected {
border: 2px solid #0066cc;
}
.result-image {
width: 100%;
height: 180px;
object-fit: cover;
background: #e9ecef;
}
.result-content {
padding: 12px;
}
.result-title {
font-weight: 600;
color: #333;
margin-bottom: 8px;
font-size: 14px;
line-height: 1.4;
}
.result-price {
color: #0066cc;
font-weight: 700;
font-size: 18px;
margin-bottom: 8px;
}
.result-meta {
display: flex;
justify-content: space-between;
font-size: 12px;
color: #666;
}
.result-location {
display: flex;
align-items: center;
gap: 4px;
}
.result-date {
font-style: italic;
}
.map-container {
position: relative;
height: 100%;
}
#map {
width: 100%;
height: 100%;
}
.status-bar {
position: absolute;
top: 10px;
left: 50%;
transform: translateX(-50%);
background: white;
padding: 10px 20px;
border-radius: 4px;
box-shadow: 0 2px 8px rgba(0,0,0,0.2);
z-index: 1000;
display: none;
}
.status-bar.visible {
display: block;
}
.status-bar.loading {
background: #e3f2fd;
color: #1565c0;
}
.status-bar.success {
background: #e8f5e9;
color: #2e7d32;
}
.status-bar.error {
background: #ffebee;
color: #c62828;
}
.no-results {
text-align: center;
padding: 40px 20px;
color: #666;
}
.loading-spinner {
display: inline-block;
width: 14px;
height: 14px;
border: 2px solid #1565c0;
border-radius: 50%;
border-top-color: transparent;
animation: spin 0.8s linear infinite;
margin-right: 8px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
@media (max-width: 1024px) {
.container {
grid-template-columns: 300px 1fr;
}
}
@media (max-width: 768px) {
.container {
grid-template-columns: 1fr;
grid-template-rows: auto auto 1fr;
}
.results-panel {
max-height: 300px;
}
}
</style>
</head>
<body>
<div class="container">
<div class="search-bar">
<input type="text" id="searchTerm" placeholder="Search term (e.g., Cube Nuroad)" value="Fahrrad">
<input type="number" id="minPrice" placeholder="Min €" value="300" min="0">
<input type="number" id="maxPrice" placeholder="Max €" value="900" min="0">
<input type="number" id="maxPages" placeholder="Pages" value="1" min="1" max="20">
<button id="searchBtn">Search</button>
<button id="cancelBtn" class="cancel" style="display: none;">Cancel</button>
</div>
<div class="results-panel">
<div class="results-header">
<div class="results-count">No results</div>
<div id="progressInfo" class="progress-info">
<div class="progress-text">Scraping listings...</div>
<div class="progress-bar">
<div class="progress-fill"></div>
</div>
<div class="eta-text"></div>
</div>
<div class="sort-control">
<label>Sort:</label>
<select id="sortSelect">
<option value="date-desc">Date (newest)</option>
<option value="date-asc">Date (oldest)</option>
<option value="price-asc">Price (low to high)</option>
<option value="price-desc">Price (high to low)</option>
</select>
</div>
</div>
<div id="resultsList"></div>
</div>
<div class="map-container">
<div id="statusBar" class="status-bar"></div>
<div id="map"></div>
</div>
</div>
<script>
const API_BASE_URL = 'http://localhost:5000';
let map;
let markers = [];
let allListings = [];
let selectedListingId = null;
let currentSessionId = null;
let scrapeStartTime = null;
let isScrapingActive = false;
// Initialize map
function initMap() {
map = L.map('map').setView([51.1657, 10.4515], 6);
L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
attribution: '© OpenStreetMap contributors',
maxZoom: 19
}).addTo(map);
}
// Show status message
function showStatus(message, type = 'loading') {
const statusBar = document.getElementById('statusBar');
statusBar.className = `status-bar visible ${type}`;
if (type === 'loading') {
statusBar.innerHTML = `<span class="loading-spinner"></span>${message}`;
} else {
statusBar.textContent = message;
}
if (type !== 'loading') {
setTimeout(() => {
statusBar.classList.remove('visible');
}, 3000);
}
}
// Update progress
function updateProgress(current, total) {
const progressInfo = document.getElementById('progressInfo');
const progressFill = progressInfo.querySelector('.progress-fill');
const progressText = progressInfo.querySelector('.progress-text');
const etaText = progressInfo.querySelector('.eta-text');
if (total === 0) {
progressInfo.classList.remove('active');
return;
}
progressInfo.classList.add('active');
const percentage = (current / total) * 100;
progressFill.style.width = percentage + '%';
progressText.textContent = `Scraping listings: ${current}/${total}`;
// Calculate ETA
if (scrapeStartTime && current > 0) {
const elapsed = (Date.now() - scrapeStartTime) / 1000;
const avgTimePerListing = elapsed / current;
const remaining = total - current;
const etaSeconds = Math.round(avgTimePerListing * remaining);
const minutes = Math.floor(etaSeconds / 60);
const seconds = etaSeconds % 60;
if (minutes > 0) {
etaText.textContent = `ETA: ~${minutes}m ${seconds}s`;
} else {
etaText.textContent = `ETA: ~${seconds}s`;
}
}
}
// Clear all markers from map
function clearMarkers() {
markers.forEach(marker => map.removeLayer(marker));
markers = [];
}
// Add marker to map
function addMarker(listing) {
if (!listing.lat || !listing.lon) return;
const marker = L.marker([listing.lat, listing.lon]).addTo(map);
const imageHtml = listing.image
? `<img src="${listing.image}" style="width: 100%; max-height: 150px; object-fit: cover; margin: 8px 0;" alt="${listing.title}">`
: '';
const popupContent = `
<div style="min-width: 200px;">
<strong style="font-size: 14px;">${listing.title}</strong><br>
${imageHtml}
<span style="color: #0066cc; font-weight: bold; font-size: 16px;">€${listing.price}</span><br>
<span style="color: #666; font-size: 12px;">${listing.address}</span><br>
<a href="${listing.url}" target="_blank" style="color: #0066cc; text-decoration: none; font-weight: 600;">Open in new tab →</a>
</div>
`;
marker.bindPopup(popupContent);
marker.on('click', () => {
selectedListingId = listing.id;
highlightSelectedListing();
});
markers.push(marker);
}
// Highlight selected listing in results list
function highlightSelectedListing() {
document.querySelectorAll('.result-item').forEach(item => {
const itemId = parseInt(item.dataset.id);
if (itemId === selectedListingId) {
item.classList.add('selected');
item.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
} else {
item.classList.remove('selected');
}
});
}
// Format date
function formatDate(dateString) {
if (!dateString) return 'Unknown date';
const date = new Date(dateString);
return date.toLocaleDateString('de-DE');
}
// Render results list
function renderResults(listings) {
const resultsList = document.getElementById('resultsList');
const resultsCount = document.querySelector('.results-count');
if (listings.length === 0) {
resultsList.innerHTML = '<div class="no-results">No listings found</div>';
resultsCount.textContent = 'No results';
return;
}
resultsCount.textContent = `${listings.length} result${listings.length !== 1 ? 's' : ''}`;
resultsList.innerHTML = listings.map(listing => `
<div class="result-item" data-id="${listing.id}">
${listing.image ? `<img src="${listing.image}" class="result-image" alt="${listing.title}">` : '<div class="result-image"></div>'}
<div class="result-content">
<div class="result-title">${listing.title}</div>
<div class="result-price">€${listing.price}</div>
<div class="result-meta">
<div class="result-location">
<span>📍</span>
<span>${listing.address || listing.zip_code}</span>
</div>
<div class="result-date">${formatDate(listing.date_added)}</div>
</div>
</div>
</div>
`).join('');
// Add click handlers
document.querySelectorAll('.result-item').forEach(item => {
item.addEventListener('click', () => {
const id = parseInt(item.dataset.id);
const listing = listings.find(l => l.id === id);
if (listing) {
selectedListingId = id;
highlightSelectedListing();
if (listing.lat && listing.lon) {
map.setView([listing.lat, listing.lon], 13);
const marker = markers.find(m =>
m.getLatLng().lat === listing.lat &&
m.getLatLng().lng === listing.lon
);
if (marker) {
marker.openPopup();
}
}
// Open listing in new tab
window.open(listing.url, '_blank');
}
});
});
}
// Sort listings
function sortListings(listings, sortBy) {
const sorted = [...listings];
switch(sortBy) {
case 'price-asc':
sorted.sort((a, b) => a.price - b.price);
break;
case 'price-desc':
sorted.sort((a, b) => b.price - a.price);
break;
case 'date-asc':
sorted.sort((a, b) => new Date(a.date_added || 0) - new Date(b.date_added || 0));
break;
case 'date-desc':
sorted.sort((a, b) => new Date(b.date_added || 0) - new Date(a.date_added || 0));
break;
}
return sorted;
}
// Scrape next listing
async function scrapeNextListing() {
if (!currentSessionId || !isScrapingActive) {
console.log('Scraping stopped: session or active flag cleared');
return false;
}
try {
const response = await fetch(`${API_BASE_URL}/api/scrape/${currentSessionId}`);
const data = await response.json();
if (data.cancelled) {
console.log('Scraping cancelled by backend');
return false;
}
if (data.listing) {
allListings.push(data.listing);
addMarker(data.listing);
// Update display
const sortBy = document.getElementById('sortSelect').value;
const sortedListings = sortListings(allListings, sortBy);
renderResults(sortedListings);
// Fit map to markers
if (markers.length > 0) {
const group = L.featureGroup(markers);
map.fitBounds(group.getBounds().pad(0.1));
}
}
updateProgress(data.progress.current, data.progress.total);
if (data.complete) {
console.log('Scraping complete, finalizing...');
isScrapingActive = false;
document.getElementById('searchBtn').disabled = false;
document.getElementById('cancelBtn').style.display = 'none';
updateProgress(0, 0);
showStatus(`Completed! Scraped ${allListings.length} listings`, 'success');
// DO NOT clear session ID - keep it for potential future use
// DO NOT reset listings or markers
console.log('Final listings count:', allListings.length);
return false;
}
return true;
} catch (error) {
console.error('Scrape error:', error);
isScrapingActive = false;
document.getElementById('searchBtn').disabled = false;
document.getElementById('cancelBtn').style.display = 'none';
showStatus('Error occurred during scraping', 'error');
return false;
}
}
// Start scraping loop
async function startScrapingLoop() {
while (isScrapingActive && currentSessionId) {
const shouldContinue = await scrapeNextListing();
if (!shouldContinue) {
break;
}
await new Promise(resolve => setTimeout(resolve, 100));
}
}
// Search listings
async function searchListings() {
const searchTerm = document.getElementById('searchTerm').value.trim();
const minPrice = parseInt(document.getElementById('minPrice').value) || 0;
const maxPrice = parseInt(document.getElementById('maxPrice').value) || 10000;
const maxPages = parseInt(document.getElementById('maxPages').value) || 5;
if (!searchTerm) {
showStatus('Please enter a search term', 'error');
return;
}
document.getElementById('searchBtn').disabled = true;
clearMarkers();
allListings = [];
selectedListingId = null;
document.getElementById('resultsList').innerHTML = '';
showStatus('Searching for listings...', 'loading');
try {
const response = await fetch(`${API_BASE_URL}/api/search`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
search_term: searchTerm,
min_price: minPrice,
max_price: maxPrice,
max_pages: maxPages
})
});
if (!response.ok) {
throw new Error('API request failed');
}
const data = await response.json();
currentSessionId = data.session_id;
if (data.total === 0) {
showStatus('No listings found', 'error');
document.getElementById('searchBtn').disabled = false;
return;
}
showStatus(`Found ${data.total} listings. Starting scrape...`, 'success');
// Show cancel button
document.getElementById('cancelBtn').style.display = 'inline-block';
// Start scraping
isScrapingActive = true;
scrapeStartTime = Date.now();
updateProgress(0, data.total);
startScrapingLoop();
} catch (error) {
console.error('Search error:', error);
showStatus('Error: Could not connect to API server', 'error');
document.getElementById('searchBtn').disabled = false;
}
}
// Cancel scraping
async function cancelScraping() {
if (!currentSessionId) return;
try {
await fetch(`${API_BASE_URL}/api/scrape/${currentSessionId}/cancel`, {
method: 'POST'
});
isScrapingActive = false;
document.getElementById('searchBtn').disabled = false;
document.getElementById('cancelBtn').style.display = 'none';
updateProgress(0, 0);
showStatus(`Cancelled. Showing ${allListings.length} scraped listings`, 'error');
} catch (error) {
console.error('Cancel error:', error);
}
}
// Event listeners
document.getElementById('searchBtn').addEventListener('click', searchListings);
document.getElementById('cancelBtn').addEventListener('click', cancelScraping);
document.getElementById('searchTerm').addEventListener('keypress', (e) => {
if (e.key === 'Enter') searchListings();
});
document.getElementById('sortSelect').addEventListener('change', (e) => {
if (allListings.length > 0) {
const sortedListings = sortListings(allListings, e.target.value);
renderResults(sortedListings);
}
});
// Initialize
initMap();
</script>
</body>
</html>