import pandas as pd import numpy as np import os import io import s3fs import re import warnings from datetime import date, timedelta, datetime import matplotlib.pyplot as plt import matplotlib.dates as mdates import seaborn as sns # Ignore warning warnings.filterwarnings('ignore') exec(open('utils_features_construction.py').read()) exec(open('utils_stat_desc.py').read()) # Create filesystem object S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) companies = {'musee' : ['1', '2', '3', '4'], # , '101' 'sport': ['5', '6', '7', '8', '9'], 'musique' : ['10', '11', '12', '13', '14']} # type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?') for type_of_activity in ['musee', 'sport', 'musique'] : list_of_comp = companies[type_of_activity] # Load files customer, campaigns_kpi, campaigns_brut, tickets, products, targets = load_files(list_of_comp) # Identify anonymous customer for each company and remove them from our datasets outlier_list = outlier_detection(tickets, list_of_comp) # Identify valid customer (customer who bought tickets after starting date or received mails after starting date) customer_valid_list = valid_customer_detection(products, campaigns_brut) databases = [customer, campaigns_kpi, campaigns_brut, tickets, products] for dataset in databases: dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier dataset = dataset[dataset['customer_id'].isin(customer_valid_list)] # keep only valid customer #print(f'shape of {dataset} : ', dataset.shape) # Identify customer who bought during the period of y customer_target_period = identify_purchase_during_target_periode(products) customer['has_purchased_target_period'] = np.where(customer['customer_id'].isin(customer_target_period), 1, 0) # Generate graph and automatically saved them in the bucket compute_nb_clients(customer, type_of_activity) #maximum_price_paid(customer, type_of_activity) target_proportion(customer, type_of_activity) mailing_consent(customer, type_of_activity) mailing_consent_by_target(customer, type_of_activity) gender_bar(customer, type_of_activity) country_bar(customer, type_of_activity) lazy_customer_plot(campaigns_kpi, type_of_activity) campaigns_effectiveness(customer, type_of_activity) sale_dynamics(products, campaigns_brut, type_of_activity) tickets_internet(tickets, type_of_activity) already_bought_online(tickets, type_of_activity) box_plot_price_tickets(tickets, type_of_activity) target_description(targets, type_of_activity)