#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Author: Hendrik Schutter, mail@hendrikschutter.com Date of creation: 2025/11/24 Date of last modification: 2025/11/24 """ from bs4 import BeautifulSoup from datetime import datetime from tinydb import TinyDB, Query import urllib3 import sys import helper from tqdm import tqdm urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def make_soup(url): user_agent = {"user-agent": helper.get_random_user_agent()} # print(user_agent) http = urllib3.PoolManager(10, headers=user_agent) r = http.request("GET", url) return BeautifulSoup(r.data, "lxml") def search(search_term, max_pages, min_price, max_price): # Sorted by newest listing # No Geo distance # Only private sellers # Only offerings base_url = "https://www.kleinanzeigen.de" found_listings = set() for page_counter in range(1, max_pages + 1): listing_url = ( base_url + "/s-anbieter:privat/anzeige:angebote/preis:" + str(min_price) + ":" + str(max_price) + "/seite:" + str(page_counter) + "/" + search_term.replace(" ", "-") + "/k0" ) print("Web Page: ", listing_url) soup = make_soup(listing_url) results = soup.find_all("li", class_="ad-listitem fully-clickable-card") # print(len(results)) if len(results) <= 0: break for result in results: try: # print(result) listing_url = result.a["href"] # print("url: " + base_url + listing_url) found_listings.add(base_url + listing_url) except (AttributeError, KeyError) as ex: pass return found_listings def scrape_listing(url): # print("Web Page: ", url) soup = make_soup(url) listing = { "title": soup.find("h1", class_="boxedarticle--title").text.strip(), "price": int( soup.find("h2", class_="boxedarticle--price").text.strip().split(" ")[0] ), "id": int( soup.find("ul", class_="flexlist text-light-800") .find_all("li", recursive=False)[1] .text ), "zip_code": int( soup.find_all("div", class_="boxedarticle--details--full")[0] .find("span", id="viewad-locality") .text.strip() .split(" ")[0] ), "address": soup.find_all("div", class_="boxedarticle--details--full")[0] .find("span", id="viewad-locality") .text.strip() .partition(" ")[2], "dateadded": datetime.strptime( soup.find_all("div", class_="boxedarticle--details--full")[1].span.text, "%d.%m.%Y", ), "first_image": soup.find("div", class_="galleryimage-element current").img[ "src" ], "url": url, } return listing if __name__ == "__main__": found_listings = search("Gravelbike", max_pages=5, min_price=300, max_price=900) print(found_listings) print(len(found_listings)) # url = "https://www.kleinanzeigen.de/s-anzeige/abus-bordo-6000k-hochwertiges-faltschloss-110-cm/3255935217-217-3407" # listing = scrape_listing(url) # print(listing) # exit() for url in tqdm(found_listings, desc="Scraping listings"): try: listing = scrape_listing(url) # print(listing) except Exception as e: print("An error occurred:", e) print("URL: " + url)