scrape search for listing
This commit is contained in:
72
search_kleinanzeigen.py
Normal file
72
search_kleinanzeigen.py
Normal file
@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
||||
Date of creation: 2025/11/24
|
||||
Date of last modification: 2025/11/24
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import datetime
|
||||
from tinydb import TinyDB, Query
|
||||
import urllib3
|
||||
import sys
|
||||
import helper
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
def make_soup(url):
|
||||
user_agent = {"user-agent": helper.get_random_user_agent()}
|
||||
# print(user_agent)
|
||||
http = urllib3.PoolManager(10, headers=user_agent)
|
||||
r = http.request("GET", url)
|
||||
return BeautifulSoup(r.data, "lxml")
|
||||
|
||||
|
||||
def search(search_term, max_pages, min_price, max_price):
|
||||
# Sorted by newest listing
|
||||
# No Geo distance
|
||||
# Only private sellers
|
||||
# Only offerings
|
||||
|
||||
base_url = "https://www.kleinanzeigen.de"
|
||||
|
||||
found_listings = set()
|
||||
|
||||
for page_counter in range(1, max_pages+1):
|
||||
listing_url = (
|
||||
base_url + "/s-anbieter:privat/anzeige:angebote/preis:" + str(min_price) + ":" + str(max_price) + "/seite:"+ str(page_counter)+"/"
|
||||
+ search_term.replace(" ", "-")
|
||||
+ "/k0"
|
||||
)
|
||||
|
||||
print ("Web Page: ", listing_url)
|
||||
soup = make_soup(listing_url)
|
||||
results = soup.find_all("li", class_="ad-listitem fully-clickable-card")
|
||||
#print(len(results))
|
||||
|
||||
if(len(results) <= 0):
|
||||
break
|
||||
|
||||
for result in results:
|
||||
try:
|
||||
#print(result)
|
||||
listing_url = result.a["href"]
|
||||
#print("url: " + base_url + listing_url)
|
||||
found_listings.add(base_url + listing_url)
|
||||
except (AttributeError, KeyError) as ex:
|
||||
pass
|
||||
|
||||
return found_listings
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
found_listings = search("Fahrrad", max_pages=1, min_price = 24, max_price=42)
|
||||
print(found_listings)
|
||||
print(len(found_listings))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user