2022-05-31 09:57:03 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""" Author: Hendrik Schutter, mail@hendrikschutter.com
Date of creation : 2022 / 05 / 31
Date of last modification : 2022 / 05 / 31
"""
from bs4 import BeautifulSoup
import datetime
from tinydb import TinyDB , Query
import urllib3
import sys
2022-05-31 14:16:15 +02:00
import helper
2022-05-31 09:57:03 +02:00
urllib3 . disable_warnings ( urllib3 . exceptions . InsecureRequestWarning )
def make_soup ( url ) :
2022-05-31 14:16:15 +02:00
user_agent = { ' user-agent ' : helper . get_random_user_agent ( ) }
#print(user_agent)
http = urllib3 . PoolManager ( 10 , headers = user_agent )
2022-05-31 09:57:03 +02:00
r = http . request ( " GET " , url )
return BeautifulSoup ( r . data , ' lxml ' )
2022-05-31 18:41:27 +02:00
def search ( search_term , max_pages ) :
2022-05-31 09:57:03 +02:00
#sort by newest listing
#display page 1 (first)
#EU only
#60 listings in one result page
2022-05-31 14:16:15 +02:00
#No auction
2022-05-31 09:57:03 +02:00
2022-05-31 18:41:27 +02:00
found_listings = set ( )
2022-05-31 09:57:03 +02:00
page_counter = 1
last_result_page = False
while not last_result_page :
result_page_added = 0
2023-01-11 19:18:48 +01:00
url = ' https://www.ebay.de/sch/i.html?_from=R40&_nkw= ' + search_term . replace ( " " , " + " ) + ' &_sacat=0&LH_TitleDesc=0&LH_BIN=1&rt=nc&LH_PrefLoc=3&_pgn= ' + str ( page_counter )
#url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=' + search_term.replace(" ", "+") + '&_sop=10&LH_PrefLoc=1&LH_SellerType=2&LH_BIN=1&_pgn=' + str(page_counter)
2022-05-31 14:16:15 +02:00
2023-01-10 22:57:06 +01:00
#print ("Web Page: ", url)
2022-05-31 09:57:03 +02:00
soup = make_soup ( url )
2023-01-10 22:53:10 +01:00
results = soup . find_all ( " div " , class_ = " s-item__info clearfix " )
2022-05-31 09:57:03 +02:00
for result in results :
try :
2022-05-31 18:41:27 +02:00
#rec = {
# 'epid': result.div.div.div.a['href'].split("?", 1)[0],
#}
2023-01-10 22:53:10 +01:00
#print(result)
found_listings . add ( result . a [ ' href ' ] . split ( " ? " , 1 ) [ 0 ] )
2022-05-31 18:41:27 +02:00
2022-05-31 09:57:03 +02:00
#check if listing is allready stored
2022-05-31 18:41:27 +02:00
#if not db.search(Query().epid == rec["epid"]):
# result_page_added += 1
# db.insert(rec)
2022-05-31 09:57:03 +02:00
except ( AttributeError , KeyError ) as ex :
pass
if ( result_page_added == 0 ) or ( page_counter == max_pages ) :
last_result_page = True
page_counter + = 1
2022-05-31 18:41:27 +02:00
return found_listings
2022-05-31 09:57:03 +02:00
if __name__ == " __main__ " :
2023-01-11 19:18:48 +01:00
print ( ( search ( " LC Power LC6450 V2.2 PC Netzteil 450W Watt " , max_pages = 4 ) ) )
2022-05-31 18:41:27 +02:00
2022-05-31 09:57:03 +02:00