102 lines
3.0 KiB
Python
102 lines
3.0 KiB
Python
from selenium import webdriver
|
||
from bs4 import BeautifulSoup
|
||
import time
|
||
from icecream import ic
|
||
|
||
|
||
# Google scraping for restaurants in Le Fooding
|
||
def google_scrap_fooding(df):
|
||
price_ranges = []
|
||
|
||
for i in range(len(df['Name'])):
|
||
name = df.iloc[i]['Name']
|
||
address = df.iloc[i]['Address']
|
||
|
||
options = webdriver.ChromeOptions()
|
||
options.add_argument("---headless=True")
|
||
driver = webdriver.Chrome(options=options)
|
||
base_url = 'https://www.google.com/search?hl=en&q='
|
||
|
||
url = base_url + str(name).replace(' ', '-') + str(address).replace(' ', '-')
|
||
ic()
|
||
ic(name)
|
||
|
||
driver.get(url)
|
||
time.sleep(0.1)
|
||
|
||
page_source = driver.page_source
|
||
driver.quit()
|
||
soup = BeautifulSoup(page_source, features='html.parser')
|
||
|
||
all_span = [_.text for _ in soup.find_all('span')]
|
||
|
||
price_range = ''.join([_ for _ in all_span if '€' in _ and '–' in _]).split('...')[-1][1:].split('€')[0]
|
||
ic(price_range)
|
||
|
||
if len(price_range) < 8:
|
||
price_ranges.append(price_range)
|
||
else:
|
||
price_ranges.append('')
|
||
|
||
df['Price Range'] = price_ranges
|
||
|
||
return df
|
||
|
||
|
||
def get_price_range_fute(soup):
|
||
price_range = (''.join([_.text for _ in soup.find_all('span') if 'Reported by' in _.text]).
|
||
strip().split('R')[0].split('€')[-1])
|
||
|
||
if price_range == '':
|
||
all_span = [_.text for _ in soup.find_all('span')]
|
||
price_range = [_ for _ in all_span if '· Prix. de' in _]
|
||
price_range = ''.join(price_range).split('€')[0].split('· Prix. de')[-1].strip()
|
||
|
||
return price_range
|
||
|
||
|
||
# Google scraping for restaurants in Petit Fute
|
||
def get_mean_price_fute(soup):
|
||
mean_price = [_.text for _ in soup.find_all('span') if '(Les prix ont été fournis par le restaurant)' in _.text]
|
||
mean_price = ''.join(mean_price).split('€')[0].split('Price range: ')[-1].strip()
|
||
return mean_price
|
||
|
||
|
||
def google_scrap_fute(df):
|
||
price_ranges = []
|
||
mean_prices = []
|
||
|
||
ic()
|
||
for name, address in zip(df['Name'], df['Address']):
|
||
ic()
|
||
ic(name)
|
||
options = webdriver.ChromeOptions()
|
||
options.add_argument("---headless=True")
|
||
driver = webdriver.Chrome(options=options)
|
||
base_url = 'https://www.google.com/search?hl=en&q='
|
||
try:
|
||
url = base_url + name.replace(' ', '-') + address.replace(' ', '-')
|
||
driver.get(url)
|
||
time.sleep(0.1)
|
||
|
||
page_source = driver.page_source
|
||
driver.quit()
|
||
soup = BeautifulSoup(page_source, features='html.parser')
|
||
|
||
price_range = get_price_range_fute(soup)
|
||
ic(price_range)
|
||
mean_price = get_mean_price_fute(soup)
|
||
ic(mean_price)
|
||
price_ranges.append(price_range)
|
||
mean_prices.append(mean_price)
|
||
|
||
except AttributeError or ConnectionError:
|
||
ic()
|
||
price_ranges.append('NA')
|
||
mean_prices.append('NA')
|
||
|
||
df['Price Range'] = price_ranges
|
||
df['Mean Price'] = mean_prices
|
||
|
||
return df
|