diff --git a/le_fooding_scraping.py b/le_fooding_scraping.py index 4cf7e55..7c3c03e 100644 --- a/le_fooding_scraping.py +++ b/le_fooding_scraping.py @@ -169,12 +169,11 @@ def scrap_page(url): df = pd.DataFrame(list(zip(names, addresses, prices)), columns=['Name', 'Address', 'Price']) for i in range(len(df)): df.loc[i, 'Date'] = datetime.date.today() - print(df.to_string()) return df # Main function -def main(): +def complete_scraping(): ic() # Initialization starting_url = 'https://lefooding.com/recherche/restaurant/place/paris-8246' @@ -186,7 +185,6 @@ def main(): temporary_df = scrap_page(starting_url) df = pd.concat([df, temporary_df], ignore_index=True) - print(df.to_string()) # Loop through the others pages for i in range(2, number_of_pages+1): @@ -194,11 +192,8 @@ def main(): new_url = starting_url + '?page=' + str(i) temporary_df = scrap_page(new_url) df = pd.concat([df, temporary_df], ignore_index=True) - print(df.to_string()) + + df = df[~df['Name'].str.contains('style="display')] return df - -df = main() - -df.to_csv('/Users/oliviermeyer/Desktop/le_fooding_test.csv', index=False, header=True, escapechar='\\')