Actualiser le_fooding_scraping.py
This commit is contained in:
parent
7e3708a7f9
commit
5740be9bbc
|
@ -96,13 +96,13 @@ def get_prices_and_addresses(names):
|
|||
adress = ''
|
||||
soup = ''
|
||||
for name in names:
|
||||
ic(name)
|
||||
if not name.isascii():
|
||||
ic()
|
||||
x = 'Not ASCII'
|
||||
prices.append(x)
|
||||
else:
|
||||
new_url = 'https://lefooding.com/restaurants/' + name.lower()
|
||||
ic(new_url)
|
||||
new_page = requests.get(new_url)
|
||||
x = 0
|
||||
match str(new_page):
|
||||
|
@ -111,32 +111,26 @@ def get_prices_and_addresses(names):
|
|||
x = get_price(new_page)
|
||||
soup = BeautifulSoup(new_page.text, features='html.parser')
|
||||
adress = get_adress(soup)
|
||||
ic(adress)
|
||||
case '<Response [404]>':
|
||||
ic()
|
||||
new_url = 'https://lefooding.com/restaurants/restaurant-' + name.lower() + '-paris'
|
||||
new_page = requests.get(new_url)
|
||||
ic(new_url)
|
||||
match str(new_page):
|
||||
case '<Response [200]>':
|
||||
ic()
|
||||
x = get_price(new_page)
|
||||
soup = BeautifulSoup(new_page.text, features='html.parser')
|
||||
adress = get_adress(soup)
|
||||
ic(adress)
|
||||
case '<Response [404]>':
|
||||
ic()
|
||||
x = '<Response [404]>'
|
||||
for i in range(1, 21):
|
||||
ic()
|
||||
new_url2 = new_url + '-' + str(i)
|
||||
new_page = requests.get(new_url2)
|
||||
if str(new_page) == '<Response [200]>':
|
||||
ic()
|
||||
x = get_price(new_page)
|
||||
soup = BeautifulSoup(new_page.text, features='html.parser')
|
||||
adress = get_adress(soup)
|
||||
ic(adress)
|
||||
break
|
||||
|
||||
prices.append(x)
|
||||
|
@ -164,7 +158,6 @@ def scrap_page(url):
|
|||
|
||||
names = get_names(soup)
|
||||
prices, addresses = get_prices_and_addresses(names)
|
||||
ic(prices, addresses)
|
||||
|
||||
df = pd.DataFrame(list(zip(names, addresses, prices)), columns=['Name', 'Address', 'Price'])
|
||||
for i in range(len(df)):
|
||||
|
@ -196,4 +189,3 @@ def complete_scraping():
|
|||
df = df[~df['Name'].str.contains('style="display')]
|
||||
|
||||
return df
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user