filter description

This commit is contained in:
2025-11-28 22:34:43 +01:00
parent b23725e2a3
commit 943a147420
5 changed files with 150 additions and 2 deletions

View File

@ -9,6 +9,7 @@ import os
import uuid
import threading
import random
import re
from kleinanzeigen_scrape import KleinanzeigenScraper
@ -128,6 +129,26 @@ def prefetch_listings_thread(session_id):
print(f"Prefetch complete for session {session_id}")
def filter_listing_search_description(description_term, description):
# Normalize the description (lowercase + remove non-alphanumeric chars)
text_clean = re.sub(r"[^a-z0-9]+", "", description.lower())
# Split the search terms by semicolon
terms = description_term.split(";")
# Check each term
for term in terms:
# Normalize each term
term_clean = re.sub(r"[^a-z0-9]+", "", term.lower())
# If any normalized term is not found in the normalized description → return False
if term_clean not in text_clean:
return False
# All terms were found
return True
@app.route("/api/search", methods=["POST"])
def api_search():
"""API endpoint for searching listings - returns count and starts prefetch"""
@ -141,6 +162,7 @@ def api_search():
num_listings = data.get("num_listings", 25)
min_price = data.get("min_price", 0)
max_price = data.get("max_price", 1000000000)
power_search_description = data.get("search_term_desc", "")
if not search_term:
return jsonify({"error": "Search term is required"}), 400
@ -163,6 +185,7 @@ def api_search():
# Store session with creation timestamp
scrape_sessions[session_id] = {
"urls": listing_urls,
"power_search_desc": power_search_description,
"total": len(listing_urls),
"scraped": 0,
"listings": [],
@ -207,6 +230,11 @@ def api_scrape(session_id):
if len(session["listings"]) > 0:
listing = session["listings"].pop(0)
if session["power_search_desc"]:
if not filter_listing_search_description(
session["power_search_desc"], listing["desc"]
):
listing = None
else:
listing = None