BDC-team-1/Descriptive_statistics/generate_stat_desc.py

69 lines
2.2 KiB
Python
Raw Normal View History

2024-03-14 19:35:03 +01:00
import pandas as pd
import numpy as np
import os
2024-03-15 00:02:50 +01:00
import io
2024-03-14 19:35:03 +01:00
import s3fs
import re
import warnings
# Ignore warning
warnings.filterwarnings('ignore')
exec(open('../0_KPI_functions.py').read())
exec(open('plot.py').read())
# Create filesystem object
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
companies = {'musee' : ['1', '2', '3', '4'], # , '101'
2024-03-15 00:02:50 +01:00
'sport': ['5'],
2024-03-14 19:35:03 +01:00
'musique' : ['10', '11', '12', '13', '14']}
type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
list_of_comp = companies[type_of_activity]
# Load files
customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp)
# Identify anonymous customer for each company and remove them from our datasets
outlier_list = outlier_detection(tickets, list_of_comp)
2024-03-14 20:11:09 +01:00
# Identify valid customer (customer who bought tickets after starting date or received mails after starting date)
2024-03-14 22:00:14 +01:00
customer_valid_list = valid_customer_detection(products, campaigns_brut)
2024-03-14 19:35:03 +01:00
databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]
for dataset in databases:
dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier
2024-03-15 00:02:50 +01:00
dataset = dataset[dataset['customer_id'].isin(customer_valid_list)] # keep only valid customer
2024-03-14 20:04:03 +01:00
#print(f'shape of {dataset} : ', dataset.shape)
2024-03-15 00:02:50 +01:00
# Identify customer who bought during the period of y
customer_target_period = identify_purchase_during_target_periode(products)
customer['has_purchased_target_period'] = np.where(customer['customer_id'].isin(customer_target_period), 1, 0)
2024-03-14 19:35:03 +01:00
# Generate graph and automatically saved them in the bucket
compute_nb_clients(customer, type_of_activity)
maximum_price_paid(customer, type_of_activity)
mailing_consent(customer, type_of_activity)
2024-03-14 22:14:40 +01:00
mailing_consent_by_target(customer)
2024-03-15 00:02:50 +01:00
gender_bar(customer, type_of_activity)
2024-03-14 19:35:03 +01:00
2024-03-15 00:02:50 +01:00
country_bar(customer, type_of_activity)
2024-03-14 19:35:03 +01:00
2024-03-15 00:02:50 +01:00
lazy_customer_plot(campaigns_kpi, type_of_activity)
2024-03-14 19:35:03 +01:00
2024-03-14 22:00:14 +01:00
#campaigns_effectiveness(customer, type_of_activity)
2024-03-14 19:35:03 +01:00
2024-03-15 00:02:50 +01:00
sale_dynamics(products, campaigns_brut, type_of_activity)
2024-03-14 19:35:03 +01:00
2024-03-15 00:02:50 +01:00
tickets_internet(tickets, type_of_activity)
2024-03-14 20:04:03 +01:00
2024-03-15 00:02:50 +01:00
box_plot_price_tickets(tickets, type_of_activity)