Compare commits
2 Commits
b23725e2a3
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 7381305228 | |||
| 943a147420 |
@ -222,6 +222,11 @@ class KleinanzeigenScraper:
|
|||||||
if not zip_code:
|
if not zip_code:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
desc = None
|
||||||
|
desc_elem = soup.find("p", id="viewad-description-text")
|
||||||
|
if desc_elem:
|
||||||
|
desc = desc_elem.text.strip()
|
||||||
|
|
||||||
listing = {
|
listing = {
|
||||||
"title": title,
|
"title": title,
|
||||||
"price": price,
|
"price": price,
|
||||||
@ -231,6 +236,7 @@ class KleinanzeigenScraper:
|
|||||||
"date_added": date_added.isoformat() if date_added else None,
|
"date_added": date_added.isoformat() if date_added else None,
|
||||||
"image": first_image,
|
"image": first_image,
|
||||||
"url": url,
|
"url": url,
|
||||||
|
"desc": desc,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add coordinates
|
# Add coordinates
|
||||||
@ -275,6 +281,7 @@ def main():
|
|||||||
# Search for listings
|
# Search for listings
|
||||||
print("Step 1: Searching for listing URLs...")
|
print("Step 1: Searching for listing URLs...")
|
||||||
listing_urls = scraper.search_listings(search_term, max_pages, min_price, max_price)
|
listing_urls = scraper.search_listings(search_term, max_pages, min_price, max_price)
|
||||||
|
#listing_urls = ["https://www.kleinanzeigen.de/s-anzeige/cube-nuroad-pro-fe-2023-58-cm-l-/3226095826-217-23527"]
|
||||||
print(f"Found {len(listing_urls)} listings\n")
|
print(f"Found {len(listing_urls)} listings\n")
|
||||||
|
|
||||||
if len(listing_urls) > 0:
|
if len(listing_urls) > 0:
|
||||||
@ -294,6 +301,7 @@ def main():
|
|||||||
print(f" Date: {listing['date_added']}")
|
print(f" Date: {listing['date_added']}")
|
||||||
print(f" Coordinates: {listing.get('lat')}, {listing.get('lon')}")
|
print(f" Coordinates: {listing.get('lat')}, {listing.get('lon')}")
|
||||||
print(f" Image: {listing['image']}")
|
print(f" Image: {listing['image']}")
|
||||||
|
print(f" Description: {listing['desc']}")
|
||||||
else:
|
else:
|
||||||
print("Failed to scrape listing")
|
print("Failed to scrape listing")
|
||||||
|
|
||||||
|
|||||||
@ -9,6 +9,7 @@ import os
|
|||||||
import uuid
|
import uuid
|
||||||
import threading
|
import threading
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
|
|
||||||
from kleinanzeigen_scrape import KleinanzeigenScraper
|
from kleinanzeigen_scrape import KleinanzeigenScraper
|
||||||
|
|
||||||
@ -128,6 +129,26 @@ def prefetch_listings_thread(session_id):
|
|||||||
print(f"Prefetch complete for session {session_id}")
|
print(f"Prefetch complete for session {session_id}")
|
||||||
|
|
||||||
|
|
||||||
|
def filter_listing_search_description(description_term, description):
|
||||||
|
# Normalize the description (lowercase + remove non-alphanumeric chars)
|
||||||
|
text_clean = re.sub(r"[^a-z0-9]+", "", description.lower())
|
||||||
|
|
||||||
|
# Split the search terms by semicolon
|
||||||
|
terms = description_term.split(";")
|
||||||
|
|
||||||
|
# Check each term
|
||||||
|
for term in terms:
|
||||||
|
# Normalize each term
|
||||||
|
term_clean = re.sub(r"[^a-z0-9]+", "", term.lower())
|
||||||
|
|
||||||
|
# If any normalized term is not found in the normalized description → return False
|
||||||
|
if term_clean not in text_clean:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# All terms were found
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/search", methods=["POST"])
|
@app.route("/api/search", methods=["POST"])
|
||||||
def api_search():
|
def api_search():
|
||||||
"""API endpoint for searching listings - returns count and starts prefetch"""
|
"""API endpoint for searching listings - returns count and starts prefetch"""
|
||||||
@ -141,6 +162,7 @@ def api_search():
|
|||||||
num_listings = data.get("num_listings", 25)
|
num_listings = data.get("num_listings", 25)
|
||||||
min_price = data.get("min_price", 0)
|
min_price = data.get("min_price", 0)
|
||||||
max_price = data.get("max_price", 1000000000)
|
max_price = data.get("max_price", 1000000000)
|
||||||
|
power_search_description = data.get("search_term_desc", "")
|
||||||
|
|
||||||
if not search_term:
|
if not search_term:
|
||||||
return jsonify({"error": "Search term is required"}), 400
|
return jsonify({"error": "Search term is required"}), 400
|
||||||
@ -163,6 +185,7 @@ def api_search():
|
|||||||
# Store session with creation timestamp
|
# Store session with creation timestamp
|
||||||
scrape_sessions[session_id] = {
|
scrape_sessions[session_id] = {
|
||||||
"urls": listing_urls,
|
"urls": listing_urls,
|
||||||
|
"power_search_desc": power_search_description,
|
||||||
"total": len(listing_urls),
|
"total": len(listing_urls),
|
||||||
"scraped": 0,
|
"scraped": 0,
|
||||||
"listings": [],
|
"listings": [],
|
||||||
@ -207,6 +230,11 @@ def api_scrape(session_id):
|
|||||||
|
|
||||||
if len(session["listings"]) > 0:
|
if len(session["listings"]) > 0:
|
||||||
listing = session["listings"].pop(0)
|
listing = session["listings"].pop(0)
|
||||||
|
if session["power_search_desc"]:
|
||||||
|
if not filter_listing_search_description(
|
||||||
|
session["power_search_desc"], listing["desc"]
|
||||||
|
):
|
||||||
|
listing = None
|
||||||
else:
|
else:
|
||||||
listing = None
|
listing = None
|
||||||
|
|
||||||
|
|||||||
@ -28,12 +28,14 @@ body {
|
|||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 8px;
|
gap: 8px;
|
||||||
|
position: relative;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search-compact {
|
.search-compact {
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 8px;
|
gap: 8px;
|
||||||
|
position: relative;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search-compact input[type="text"] {
|
.search-compact input[type="text"] {
|
||||||
@ -46,6 +48,84 @@ body {
|
|||||||
color: #e0e0e0;
|
color: #e0e0e0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.input-with-info {
|
||||||
|
position: relative;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.input-with-info input {
|
||||||
|
width: 100%;
|
||||||
|
padding: 10px 40px 10px 10px;
|
||||||
|
border: 1px solid #3a3a3a;
|
||||||
|
border-radius: 6px;
|
||||||
|
font-size: 14px;
|
||||||
|
background: #2a2a2a;
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-icon {
|
||||||
|
position: absolute;
|
||||||
|
right: 8px;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
color: #0ea5e9;
|
||||||
|
font-size: 18px;
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 4px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
width: 28px;
|
||||||
|
height: 28px;
|
||||||
|
border-radius: 50%;
|
||||||
|
transition: background 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-icon:hover {
|
||||||
|
background: rgba(14, 165, 233, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-tooltip {
|
||||||
|
display: none;
|
||||||
|
position: absolute;
|
||||||
|
top: calc(100% + 4px);
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
background: #2a2a2a;
|
||||||
|
border: 1px solid #0ea5e9;
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 12px;
|
||||||
|
z-index: 10000;
|
||||||
|
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-tooltip.show {
|
||||||
|
display: block !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-tooltip-content {
|
||||||
|
font-size: 12px;
|
||||||
|
line-height: 1.5;
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-tooltip-content strong {
|
||||||
|
display: block;
|
||||||
|
color: #0ea5e9;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
font-size: 13px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-tooltip-content p {
|
||||||
|
margin: 6px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-tooltip-content em {
|
||||||
|
color: #888;
|
||||||
|
}
|
||||||
|
|
||||||
.search-row-compact {
|
.search-row-compact {
|
||||||
display: grid;
|
display: grid;
|
||||||
grid-template-columns: 1fr 1fr 70px;
|
grid-template-columns: 1fr 1fr 70px;
|
||||||
@ -63,6 +143,17 @@ body {
|
|||||||
min-width: 0;
|
min-width: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.search-row-compact input[type="number"] {
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
.search-row-compact input[type="number"]::after {
|
||||||
|
content: '€';
|
||||||
|
position: absolute;
|
||||||
|
right: 8px;
|
||||||
|
color: #888;
|
||||||
|
}
|
||||||
|
|
||||||
.search-actions-compact {
|
.search-actions-compact {
|
||||||
display: grid;
|
display: grid;
|
||||||
grid-template-columns: 1fr 1fr;
|
grid-template-columns: 1fr 1fr;
|
||||||
@ -449,6 +540,20 @@ body {
|
|||||||
padding: 12px;
|
padding: 12px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.input-with-info {
|
||||||
|
width: 250px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.input-with-info input {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.info-tooltip {
|
||||||
|
left: 0;
|
||||||
|
right: auto;
|
||||||
|
width: 350px;
|
||||||
|
}
|
||||||
|
|
||||||
.search-row-compact {
|
.search-row-compact {
|
||||||
width: auto;
|
width: auto;
|
||||||
display: flex;
|
display: flex;
|
||||||
|
|||||||
@ -14,7 +14,12 @@
|
|||||||
<!-- Search Bar -->
|
<!-- Search Bar -->
|
||||||
<div class="search-bar">
|
<div class="search-bar">
|
||||||
<div class="search-compact">
|
<div class="search-compact">
|
||||||
<input type="text" id="searchTerm" placeholder="z.B. Gravelbike">
|
<input type="text" id="searchTerm" placeholder="Suchbegriff z.B. Gravelbike">
|
||||||
|
<div class="input-with-info">
|
||||||
|
<input type="text" id="powerSearchDescTerm" placeholder="Beschreibung z.B. Schutzblech">
|
||||||
|
<button class="info-icon" id="infoIcon" type="button"
|
||||||
|
title="Gib mehrere Suchbegriffe ein, getrennt durch Semikolons. Alle eingegebenen Begriffe müssen in der Beschreibung vorkommen.">ⓘ</button>
|
||||||
|
</div>
|
||||||
<div class="search-row-compact">
|
<div class="search-row-compact">
|
||||||
<input type="number" id="minPrice" placeholder="Min €" value="" min="0" max="1000000000">
|
<input type="number" id="minPrice" placeholder="Min €" value="" min="0" max="1000000000">
|
||||||
<input type="number" id="maxPrice" placeholder="Max €" value="" min="0" max="1000000000">
|
<input type="number" id="maxPrice" placeholder="Max €" value="" min="0" max="1000000000">
|
||||||
|
|||||||
@ -72,6 +72,7 @@ async function searchListings() {
|
|||||||
const maxPriceInput = document.getElementById('maxPrice').value;
|
const maxPriceInput = document.getElementById('maxPrice').value;
|
||||||
const maxPrice = maxPriceInput ? parseInt(maxPriceInput) : 1000000000;
|
const maxPrice = maxPriceInput ? parseInt(maxPriceInput) : 1000000000;
|
||||||
const numListings = parseInt(document.getElementById('numListings').value) || 25;
|
const numListings = parseInt(document.getElementById('numListings').value) || 25;
|
||||||
|
const powerSearchDescriptionTerm = document.getElementById('powerSearchDescTerm').value.trim();
|
||||||
|
|
||||||
if (!searchTerm) {
|
if (!searchTerm) {
|
||||||
showStatus('Bitte Suchbegriff eingeben', 'error');
|
showStatus('Bitte Suchbegriff eingeben', 'error');
|
||||||
@ -96,7 +97,8 @@ async function searchListings() {
|
|||||||
search_term: searchTerm,
|
search_term: searchTerm,
|
||||||
min_price: minPrice,
|
min_price: minPrice,
|
||||||
max_price: maxPrice,
|
max_price: maxPrice,
|
||||||
num_listings: numListings
|
num_listings: numListings,
|
||||||
|
search_term_desc: powerSearchDescriptionTerm
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user