Actualiser le_fooding_scraping.py

2024-06-27 11:46:55 +02:00 · 2024-06-27 11:46:55 +02:00 · 5740be9bbc
commit 5740be9bbc
parent 7e3708a7f9
1 changed files with 1 additions and 9 deletions
--- a/le_fooding_scraping.py
+++ b/le_fooding_scraping.py
@ -96,13 +96,13 @@ def get_prices_and_addresses(names):
    adress = ''
    soup = ''
    for name in names:
        ic(name)
        if not name.isascii():
            ic()
            x = 'Not ASCII'
            prices.append(x)
        else:
            new_url = 'https://lefooding.com/restaurants/' + name.lower()
            ic(new_url)
            new_page = requests.get(new_url)
            x = 0
            match str(new_page):
@ -111,32 +111,26 @@ def get_prices_and_addresses(names):
                    x = get_price(new_page)
                    soup = BeautifulSoup(new_page.text, features='html.parser')
                    adress = get_adress(soup)
                    ic(adress)
                case '<Response [404]>':
                    ic()
                    new_url = 'https://lefooding.com/restaurants/restaurant-' + name.lower() + '-paris'
                    new_page = requests.get(new_url)
                    ic(new_url)
                    match str(new_page):
                        case '<Response [200]>':
                            ic()
                            x = get_price(new_page)
                            soup = BeautifulSoup(new_page.text, features='html.parser')
                            adress = get_adress(soup)
                            ic(adress)
                        case '<Response [404]>':
                            ic()
                            x = '<Response [404]>'
                            for i in range(1, 21):
                                ic()
                                new_url2 = new_url + '-' + str(i)
                                new_page = requests.get(new_url2)
                                if str(new_page) == '<Response [200]>':
                                    ic()
                                    x = get_price(new_page)
                                    soup = BeautifulSoup(new_page.text, features='html.parser')
                                    adress = get_adress(soup)
                                    ic(adress)
                                    break
        prices.append(x)
@ -164,7 +158,6 @@ def scrap_page(url):
    names = get_names(soup)
    prices, addresses = get_prices_and_addresses(names)
    ic(prices, addresses)
    df = pd.DataFrame(list(zip(names, addresses, prices)), columns=['Name', 'Address', 'Price'])
    for i in range(len(df)):
@ -196,4 +189,3 @@ def complete_scraping():
    df = df[~df['Name'].str.contains('style="display')]
    return df