from selenium import webdriver from bs4 import BeautifulSoup import time from icecream import ic # Google scraping for restaurants in Le Fooding def google_scrap_fooding(df): price_ranges = [] for i in range(len(df['Name'])): name = df.iloc[i]['Name'] address = df.iloc[i]['Address'] options = webdriver.ChromeOptions() options.add_argument("---headless=True") driver = webdriver.Chrome(options=options) base_url = 'https://www.google.com/search?hl=en&q=' url = base_url + str(name).replace(' ', '-') + str(address).replace(' ', '-') ic() ic(name) driver.get(url) time.sleep(0.1) page_source = driver.page_source driver.quit() soup = BeautifulSoup(page_source, features='html.parser') all_span = [_.text for _ in soup.find_all('span')] price_range = ''.join([_ for _ in all_span if '€' in _ and '–' in _]).split('...')[-1][1:].split('€')[0] ic(price_range) if len(price_range) < 8: price_ranges.append(price_range) else: price_ranges.append('') df['Price Range'] = price_ranges return df def get_price_range_fute(soup): price_range = (''.join([_.text for _ in soup.find_all('span') if 'Reported by' in _.text]). strip().split('R')[0].split('€')[-1]) if price_range == '': all_span = [_.text for _ in soup.find_all('span')] price_range = [_ for _ in all_span if '· Prix. de' in _] price_range = ''.join(price_range).split('€')[0].split('· Prix. de')[-1].strip() return price_range # Google scraping for restaurants in Petit Fute def get_mean_price_fute(soup): mean_price = [_.text for _ in soup.find_all('span') if '(Les prix ont été fournis par le restaurant)' in _.text] mean_price = ''.join(mean_price).split('€')[0].split('Price range: ')[-1].strip() return mean_price def google_scrap_fute(df): price_ranges = [] mean_prices = [] ic() for name, address in zip(df['Name'], df['Address']): ic() ic(name) options = webdriver.ChromeOptions() options.add_argument("---headless=True") driver = webdriver.Chrome(options=options) base_url = 'https://www.google.com/search?hl=en&q=' try: url = base_url + name.replace(' ', '-') + address.replace(' ', '-') driver.get(url) time.sleep(0.1) page_source = driver.page_source driver.quit() soup = BeautifulSoup(page_source, features='html.parser') price_range = get_price_range_fute(soup) ic(price_range) mean_price = get_mean_price_fute(soup) ic(mean_price) price_ranges.append(price_range) mean_prices.append(mean_price) except AttributeError or ConnectionError: ic() price_ranges.append('NA') mean_prices.append('NA') df['Price Range'] = price_ranges df['Mean Price'] = mean_prices return df