diff --git a/le_fooding_scraping.py b/le_fooding_scraping.py index 7c3c03e..536daed 100644 --- a/le_fooding_scraping.py +++ b/le_fooding_scraping.py @@ -96,13 +96,13 @@ def get_prices_and_addresses(names): adress = '' soup = '' for name in names: + ic(name) if not name.isascii(): ic() x = 'Not ASCII' prices.append(x) else: new_url = 'https://lefooding.com/restaurants/' + name.lower() - ic(new_url) new_page = requests.get(new_url) x = 0 match str(new_page): @@ -111,32 +111,26 @@ def get_prices_and_addresses(names): x = get_price(new_page) soup = BeautifulSoup(new_page.text, features='html.parser') adress = get_adress(soup) - ic(adress) case '': ic() new_url = 'https://lefooding.com/restaurants/restaurant-' + name.lower() + '-paris' new_page = requests.get(new_url) - ic(new_url) match str(new_page): case '': ic() x = get_price(new_page) soup = BeautifulSoup(new_page.text, features='html.parser') adress = get_adress(soup) - ic(adress) case '': ic() x = '' for i in range(1, 21): - ic() new_url2 = new_url + '-' + str(i) new_page = requests.get(new_url2) if str(new_page) == '': - ic() x = get_price(new_page) soup = BeautifulSoup(new_page.text, features='html.parser') adress = get_adress(soup) - ic(adress) break prices.append(x) @@ -164,7 +158,6 @@ def scrap_page(url): names = get_names(soup) prices, addresses = get_prices_and_addresses(names) - ic(prices, addresses) df = pd.DataFrame(list(zip(names, addresses, prices)), columns=['Name', 'Address', 'Price']) for i in range(len(df)): @@ -196,4 +189,3 @@ def complete_scraping(): df = df[~df['Name'].str.contains('style="display')] return df -