77 lines
2.3 KiB
Python
77 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
""" Author: Hendrik Schutter, mail@hendrikschutter.com
|
|
Date of creation: 2022/05/31
|
|
Date of last modification: 2024/08/18
|
|
"""
|
|
|
|
from bs4 import BeautifulSoup
|
|
import datetime
|
|
from tinydb import TinyDB, Query
|
|
import urllib3
|
|
import sys
|
|
import helper
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
|
|
def make_soup(url):
|
|
user_agent = {"user-agent": helper.get_random_user_agent()}
|
|
# print(user_agent)
|
|
http = urllib3.PoolManager(10, headers=user_agent)
|
|
r = http.request("GET", url)
|
|
return BeautifulSoup(r.data, "lxml")
|
|
|
|
|
|
def search(search_term, max_pages):
|
|
# sort by newest listing
|
|
# display page 1 (first)
|
|
# EU only
|
|
# 60 listings in one result page
|
|
# No auction
|
|
|
|
found_listings = set()
|
|
|
|
page_counter = 1
|
|
last_result_page = False
|
|
|
|
while not last_result_page:
|
|
result_page_added = 0
|
|
url = (
|
|
"https://www.ebay.de/sch/i.html?_from=R40&_nkw="
|
|
+ search_term.replace(" ", "+")
|
|
+ "&_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn="
|
|
+ str(page_counter)
|
|
)
|
|
# url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter)
|
|
|
|
# print ("Web Page: ", url)
|
|
|
|
soup = make_soup(url)
|
|
results = soup.find_all("div", class_="s-item__info clearfix")
|
|
|
|
for result in results:
|
|
try:
|
|
# rec = {
|
|
# 'epid': result.div.div.div.a['href'].split("?", 1)[0],
|
|
# }
|
|
# print(result)
|
|
found_listings.add(result.a["href"].split("?", 1)[0])
|
|
|
|
# check if listing is already stored
|
|
# if not db.search(Query().epid == rec["epid"]):
|
|
# result_page_added += 1
|
|
# db.insert(rec)
|
|
|
|
except (AttributeError, KeyError) as ex:
|
|
pass
|
|
if (result_page_added == 0) or (page_counter == max_pages):
|
|
last_result_page = True
|
|
page_counter += 1
|
|
|
|
return found_listings
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print((search("LC Power LC6450 V2.2 PC Netzteil 450W Watt", max_pages=4)))
|