debug
This commit is contained in:
parent
54fbad0344
commit
db6eaaaa8d
103
Descriptive_statistics/debug.ipynb
Normal file
103
Descriptive_statistics/debug.ipynb
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "718d4e6d-b90a-4955-90ee-c1518246c07c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdin",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Choisissez le type de compagnie : sport ? musique ? musee ? sport\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import os\n",
|
||||||
|
"import s3fs\n",
|
||||||
|
"import re\n",
|
||||||
|
"import warnings\n",
|
||||||
|
"\n",
|
||||||
|
"# Ignore warning\n",
|
||||||
|
"warnings.filterwarnings('ignore')\n",
|
||||||
|
"\n",
|
||||||
|
"exec(open('../0_KPI_functions.py').read())\n",
|
||||||
|
"exec(open('plot.py').read())\n",
|
||||||
|
"\n",
|
||||||
|
"# Create filesystem object\n",
|
||||||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||||||
|
"\n",
|
||||||
|
"companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
|
||||||
|
" 'sport': ['5', '6'],\n",
|
||||||
|
" 'musique' : ['10', '11', '12', '13', '14']}\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')\n",
|
||||||
|
"list_of_comp = companies[type_of_activity] \n",
|
||||||
|
"\n",
|
||||||
|
"# Load files\n",
|
||||||
|
"customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b967f70a-e3ae-423e-9fb0-edfc00ddf826",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Identify anonymous customer for each company and remove them from our datasets\n",
|
||||||
|
"outlier_list = outlier_detection(tickets, list_of_comp)\n",
|
||||||
|
"\n",
|
||||||
|
"# Identify valid customer (customer who bought tickets after starting date or received mails after starting date)\n",
|
||||||
|
"customer_valid_list = valid_customer_detection(products, campaigns_brut)\n",
|
||||||
|
"\n",
|
||||||
|
"# Identify customer who bought during the period of y\n",
|
||||||
|
"consumer_target_period = identify_purchase_during_target_periode(products)\n",
|
||||||
|
"\n",
|
||||||
|
"databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]\n",
|
||||||
|
"\n",
|
||||||
|
"for dataset in databases:\n",
|
||||||
|
" dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier\n",
|
||||||
|
" dataset['customer_id'] = dataset['customer_id'].isin(customer_valid_list) # keep only valid customer\n",
|
||||||
|
" dataset['has_purchased_target_period'] = np.where(dataset['customer_id'].isin(customer_valid_list), 1, 0)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
|
@ -50,6 +50,8 @@ maximum_price_paid(customer, type_of_activity)
|
||||||
|
|
||||||
mailing_consent(customer, type_of_activity)
|
mailing_consent(customer, type_of_activity)
|
||||||
|
|
||||||
|
mailing_consent_by_target(customer)
|
||||||
|
|
||||||
#gender_bar(customer, type_of_activity)
|
#gender_bar(customer, type_of_activity)
|
||||||
|
|
||||||
#country_bar(customer, type_of_activity)
|
#country_bar(customer, type_of_activity)
|
||||||
|
|
|
@ -70,7 +70,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
|
||||||
|
|
||||||
total_amount_share_index = total_amount_share.set_index('customer_id')
|
total_amount_share_index = total_amount_share.set_index('customer_id')
|
||||||
df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)
|
df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)
|
||||||
print('df circulaire : ', df_circulaire.head())
|
#print('df circulaire : ', df_circulaire.head())
|
||||||
top = df_circulaire[:1]
|
top = df_circulaire[:1]
|
||||||
print('top : ', top)
|
print('top : ', top)
|
||||||
outlier_list.append(top.index[0])
|
outlier_list.append(top.index[0])
|
||||||
|
@ -101,7 +101,7 @@ def valid_customer_detection(products, campaigns_brut):
|
||||||
|
|
||||||
|
|
||||||
def identify_purchase_during_target_periode(products):
|
def identify_purchase_during_target_periode(products):
|
||||||
products_target_period = products[products['purchase_date']>="2022-11-01" & products['purchase_date']<="2023-11-01"]
|
products_target_period = products[(products['purchase_date']>="2022-11-01") & (products['purchase_date']<="2023-11-01")]
|
||||||
consumer_target_period = products_target_period['customer_id'].to_list()
|
consumer_target_period = products_target_period['customer_id'].to_list()
|
||||||
return consumer_target_period
|
return consumer_target_period
|
||||||
|
|
||||||
|
@ -140,13 +140,46 @@ def mailing_consent(customer, type_of_activity):
|
||||||
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
|
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
|
||||||
|
|
||||||
plt.xlabel('Company')
|
plt.xlabel('Company')
|
||||||
plt.ylabel('Consent of mailing (%)')
|
plt.ylabel('Company')
|
||||||
plt.title(f'Consent of mailing for {type_of_activity}')
|
plt.title(f'Consent of mailing for {type_of_activity}')
|
||||||
|
|
||||||
plt.show()
|
plt.show()
|
||||||
save_file_s3("mailing_consent_", type_of_activity)
|
save_file_s3("mailing_consent_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def mailing_consent_by_target(customer):
|
||||||
|
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||||
|
# Création du barplot groupé
|
||||||
|
fig, ax = plt.subplots(figsize=(10, 6))
|
||||||
|
|
||||||
|
categories = df_graph["number_company"].unique()
|
||||||
|
bar_width = 0.35
|
||||||
|
bar_positions = np.arange(len(categories))
|
||||||
|
|
||||||
|
# Grouper les données par label et créer les barres groupées
|
||||||
|
for label in df_graph["has_purchased_target_period"].unique():
|
||||||
|
label_data = df_graph[df_graph['has_purchased_target_period'] == label]
|
||||||
|
values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]
|
||||||
|
|
||||||
|
label_printed = "purchased" if label else "no purchase"
|
||||||
|
ax.bar(bar_positions, values, bar_width, label=label_printed)
|
||||||
|
|
||||||
|
# Mise à jour des positions des barres pour le prochain groupe
|
||||||
|
bar_positions = [pos + bar_width for pos in bar_positions]
|
||||||
|
|
||||||
|
# Ajout des étiquettes, de la légende, etc.
|
||||||
|
ax.set_xlabel('Company')
|
||||||
|
ax.set_ylabel('Company')
|
||||||
|
ax.set_title(f'Consent of mailing according to target for {type_of_activity}')
|
||||||
|
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
|
||||||
|
ax.set_xticklabels(categories)
|
||||||
|
ax.legend()
|
||||||
|
|
||||||
|
# Affichage du plot
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("mailing_consent_target_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def gender_bar(customer, type_of_activity):
|
def gender_bar(customer, type_of_activity):
|
||||||
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
|
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user