Actualiser le_fooding_scraping.py
This commit is contained in:
parent
7e3708a7f9
commit
5740be9bbc
|
@ -96,13 +96,13 @@ def get_prices_and_addresses(names):
|
||||||
adress = ''
|
adress = ''
|
||||||
soup = ''
|
soup = ''
|
||||||
for name in names:
|
for name in names:
|
||||||
|
ic(name)
|
||||||
if not name.isascii():
|
if not name.isascii():
|
||||||
ic()
|
ic()
|
||||||
x = 'Not ASCII'
|
x = 'Not ASCII'
|
||||||
prices.append(x)
|
prices.append(x)
|
||||||
else:
|
else:
|
||||||
new_url = 'https://lefooding.com/restaurants/' + name.lower()
|
new_url = 'https://lefooding.com/restaurants/' + name.lower()
|
||||||
ic(new_url)
|
|
||||||
new_page = requests.get(new_url)
|
new_page = requests.get(new_url)
|
||||||
x = 0
|
x = 0
|
||||||
match str(new_page):
|
match str(new_page):
|
||||||
|
@ -111,32 +111,26 @@ def get_prices_and_addresses(names):
|
||||||
x = get_price(new_page)
|
x = get_price(new_page)
|
||||||
soup = BeautifulSoup(new_page.text, features='html.parser')
|
soup = BeautifulSoup(new_page.text, features='html.parser')
|
||||||
adress = get_adress(soup)
|
adress = get_adress(soup)
|
||||||
ic(adress)
|
|
||||||
case '<Response [404]>':
|
case '<Response [404]>':
|
||||||
ic()
|
ic()
|
||||||
new_url = 'https://lefooding.com/restaurants/restaurant-' + name.lower() + '-paris'
|
new_url = 'https://lefooding.com/restaurants/restaurant-' + name.lower() + '-paris'
|
||||||
new_page = requests.get(new_url)
|
new_page = requests.get(new_url)
|
||||||
ic(new_url)
|
|
||||||
match str(new_page):
|
match str(new_page):
|
||||||
case '<Response [200]>':
|
case '<Response [200]>':
|
||||||
ic()
|
ic()
|
||||||
x = get_price(new_page)
|
x = get_price(new_page)
|
||||||
soup = BeautifulSoup(new_page.text, features='html.parser')
|
soup = BeautifulSoup(new_page.text, features='html.parser')
|
||||||
adress = get_adress(soup)
|
adress = get_adress(soup)
|
||||||
ic(adress)
|
|
||||||
case '<Response [404]>':
|
case '<Response [404]>':
|
||||||
ic()
|
ic()
|
||||||
x = '<Response [404]>'
|
x = '<Response [404]>'
|
||||||
for i in range(1, 21):
|
for i in range(1, 21):
|
||||||
ic()
|
|
||||||
new_url2 = new_url + '-' + str(i)
|
new_url2 = new_url + '-' + str(i)
|
||||||
new_page = requests.get(new_url2)
|
new_page = requests.get(new_url2)
|
||||||
if str(new_page) == '<Response [200]>':
|
if str(new_page) == '<Response [200]>':
|
||||||
ic()
|
|
||||||
x = get_price(new_page)
|
x = get_price(new_page)
|
||||||
soup = BeautifulSoup(new_page.text, features='html.parser')
|
soup = BeautifulSoup(new_page.text, features='html.parser')
|
||||||
adress = get_adress(soup)
|
adress = get_adress(soup)
|
||||||
ic(adress)
|
|
||||||
break
|
break
|
||||||
|
|
||||||
prices.append(x)
|
prices.append(x)
|
||||||
|
@ -164,7 +158,6 @@ def scrap_page(url):
|
||||||
|
|
||||||
names = get_names(soup)
|
names = get_names(soup)
|
||||||
prices, addresses = get_prices_and_addresses(names)
|
prices, addresses = get_prices_and_addresses(names)
|
||||||
ic(prices, addresses)
|
|
||||||
|
|
||||||
df = pd.DataFrame(list(zip(names, addresses, prices)), columns=['Name', 'Address', 'Price'])
|
df = pd.DataFrame(list(zip(names, addresses, prices)), columns=['Name', 'Address', 'Price'])
|
||||||
for i in range(len(df)):
|
for i in range(len(df)):
|
||||||
|
@ -196,4 +189,3 @@ def complete_scraping():
|
||||||
df = df[~df['Name'].str.contains('style="display')]
|
df = df[~df['Name'].str.contains('style="display')]
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user