82 lines
2.8 KiB
Python
82 lines
2.8 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
import io
|
|
import s3fs
|
|
import re
|
|
import warnings
|
|
from datetime import date, timedelta, datetime
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.dates as mdates
|
|
import seaborn as sns
|
|
|
|
|
|
|
|
# Ignore warning
|
|
warnings.filterwarnings('ignore')
|
|
|
|
exec(open('utils_features_construction.py').read())
|
|
exec(open('utils_stat_desc.py').read())
|
|
|
|
# Create filesystem object
|
|
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
|
|
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
|
|
|
|
companies = {'musee' : ['1', '2', '3', '4'], # , '101'
|
|
'sport': ['5', '6', '7', '8', '9'],
|
|
'musique' : ['10', '11', '12', '13', '14']}
|
|
|
|
|
|
# type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
|
|
for type_of_activity in ['musee', 'sport', 'musique'] :
|
|
|
|
list_of_comp = companies[type_of_activity]
|
|
|
|
# Load files
|
|
customer, campaigns_kpi, campaigns_brut, tickets, products, targets = load_files(list_of_comp)
|
|
|
|
# Identify anonymous customer for each company and remove them from our datasets
|
|
outlier_list = outlier_detection(tickets, list_of_comp)
|
|
|
|
# Identify valid customer (customer who bought tickets after starting date or received mails after starting date)
|
|
customer_valid_list = valid_customer_detection(products, campaigns_brut)
|
|
|
|
databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]
|
|
|
|
for dataset in databases:
|
|
dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier
|
|
dataset = dataset[dataset['customer_id'].isin(customer_valid_list)] # keep only valid customer
|
|
#print(f'shape of {dataset} : ', dataset.shape)
|
|
|
|
# Identify customer who bought during the period of y
|
|
customer_target_period = identify_purchase_during_target_periode(products)
|
|
customer['has_purchased_target_period'] = np.where(customer['customer_id'].isin(customer_target_period), 1, 0)
|
|
|
|
# Generate graph and automatically saved them in the bucket
|
|
compute_nb_clients(customer, type_of_activity)
|
|
|
|
#maximum_price_paid(customer, type_of_activity)
|
|
|
|
target_proportion(customer, type_of_activity)
|
|
|
|
mailing_consent(customer, type_of_activity)
|
|
|
|
mailing_consent_by_target(customer, type_of_activity)
|
|
|
|
gender_bar(customer, type_of_activity)
|
|
|
|
country_bar(customer, type_of_activity)
|
|
|
|
lazy_customer_plot(campaigns_kpi, type_of_activity)
|
|
|
|
campaigns_effectiveness(customer, type_of_activity)
|
|
|
|
sale_dynamics(products, campaigns_brut, type_of_activity)
|
|
|
|
tickets_internet(tickets, type_of_activity)
|
|
|
|
already_bought_online(tickets, type_of_activity)
|
|
|
|
box_plot_price_tickets(tickets, type_of_activity)
|
|
|
|
target_description(targets, type_of_activity) |