Actualiser le_fooding_scraping.py

This commit is contained in:
Olivier MEYER 2024-06-27 11:46:55 +02:00
parent 7e3708a7f9
commit 5740be9bbc

View File

@ -96,13 +96,13 @@ def get_prices_and_addresses(names):
adress = ''
soup = ''
for name in names:
ic(name)
if not name.isascii():
ic()
x = 'Not ASCII'
prices.append(x)
else:
new_url = 'https://lefooding.com/restaurants/' + name.lower()
ic(new_url)
new_page = requests.get(new_url)
x = 0
match str(new_page):
@ -111,32 +111,26 @@ def get_prices_and_addresses(names):
x = get_price(new_page)
soup = BeautifulSoup(new_page.text, features='html.parser')
adress = get_adress(soup)
ic(adress)
case '<Response [404]>':
ic()
new_url = 'https://lefooding.com/restaurants/restaurant-' + name.lower() + '-paris'
new_page = requests.get(new_url)
ic(new_url)
match str(new_page):
case '<Response [200]>':
ic()
x = get_price(new_page)
soup = BeautifulSoup(new_page.text, features='html.parser')
adress = get_adress(soup)
ic(adress)
case '<Response [404]>':
ic()
x = '<Response [404]>'
for i in range(1, 21):
ic()
new_url2 = new_url + '-' + str(i)
new_page = requests.get(new_url2)
if str(new_page) == '<Response [200]>':
ic()
x = get_price(new_page)
soup = BeautifulSoup(new_page.text, features='html.parser')
adress = get_adress(soup)
ic(adress)
break
prices.append(x)
@ -164,7 +158,6 @@ def scrap_page(url):
names = get_names(soup)
prices, addresses = get_prices_and_addresses(names)
ic(prices, addresses)
df = pd.DataFrame(list(zip(names, addresses, prices)), columns=['Name', 'Address', 'Price'])
for i in range(len(df)):
@ -196,4 +189,3 @@ def complete_scraping():
df = df[~df['Name'].str.contains('style="display')]
return df