debug
This commit is contained in:
parent
54fbad0344
commit
db6eaaaa8d
103
Descriptive_statistics/debug.ipynb
Normal file
103
Descriptive_statistics/debug.ipynb
Normal file
|
@ -0,0 +1,103 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "718d4e6d-b90a-4955-90ee-c1518246c07c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Choisissez le type de compagnie : sport ? musique ? musee ? sport\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import s3fs\n",
|
||||
"import re\n",
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"# Ignore warning\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
"\n",
|
||||
"exec(open('../0_KPI_functions.py').read())\n",
|
||||
"exec(open('plot.py').read())\n",
|
||||
"\n",
|
||||
"# Create filesystem object\n",
|
||||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||||
"\n",
|
||||
"companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
|
||||
" 'sport': ['5', '6'],\n",
|
||||
" 'musique' : ['10', '11', '12', '13', '14']}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')\n",
|
||||
"list_of_comp = companies[type_of_activity] \n",
|
||||
"\n",
|
||||
"# Load files\n",
|
||||
"customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b967f70a-e3ae-423e-9fb0-edfc00ddf826",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Identify anonymous customer for each company and remove them from our datasets\n",
|
||||
"outlier_list = outlier_detection(tickets, list_of_comp)\n",
|
||||
"\n",
|
||||
"# Identify valid customer (customer who bought tickets after starting date or received mails after starting date)\n",
|
||||
"customer_valid_list = valid_customer_detection(products, campaigns_brut)\n",
|
||||
"\n",
|
||||
"# Identify customer who bought during the period of y\n",
|
||||
"consumer_target_period = identify_purchase_during_target_periode(products)\n",
|
||||
"\n",
|
||||
"databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]\n",
|
||||
"\n",
|
||||
"for dataset in databases:\n",
|
||||
" dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier\n",
|
||||
" dataset['customer_id'] = dataset['customer_id'].isin(customer_valid_list) # keep only valid customer\n",
|
||||
" dataset['has_purchased_target_period'] = np.where(dataset['customer_id'].isin(customer_valid_list), 1, 0)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -50,6 +50,8 @@ maximum_price_paid(customer, type_of_activity)
|
|||
|
||||
mailing_consent(customer, type_of_activity)
|
||||
|
||||
mailing_consent_by_target(customer)
|
||||
|
||||
#gender_bar(customer, type_of_activity)
|
||||
|
||||
#country_bar(customer, type_of_activity)
|
||||
|
|
|
@ -70,7 +70,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
|
|||
|
||||
total_amount_share_index = total_amount_share.set_index('customer_id')
|
||||
df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)
|
||||
print('df circulaire : ', df_circulaire.head())
|
||||
#print('df circulaire : ', df_circulaire.head())
|
||||
top = df_circulaire[:1]
|
||||
print('top : ', top)
|
||||
outlier_list.append(top.index[0])
|
||||
|
@ -101,7 +101,7 @@ def valid_customer_detection(products, campaigns_brut):
|
|||
|
||||
|
||||
def identify_purchase_during_target_periode(products):
|
||||
products_target_period = products[products['purchase_date']>="2022-11-01" & products['purchase_date']<="2023-11-01"]
|
||||
products_target_period = products[(products['purchase_date']>="2022-11-01") & (products['purchase_date']<="2023-11-01")]
|
||||
consumer_target_period = products_target_period['customer_id'].to_list()
|
||||
return consumer_target_period
|
||||
|
||||
|
@ -140,13 +140,46 @@ def mailing_consent(customer, type_of_activity):
|
|||
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
|
||||
|
||||
plt.xlabel('Company')
|
||||
plt.ylabel('Consent of mailing (%)')
|
||||
plt.ylabel('Company')
|
||||
plt.title(f'Consent of mailing for {type_of_activity}')
|
||||
|
||||
plt.show()
|
||||
save_file_s3("mailing_consent_", type_of_activity)
|
||||
|
||||
|
||||
def mailing_consent_by_target(customer):
|
||||
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||
# Création du barplot groupé
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
categories = df_graph["number_company"].unique()
|
||||
bar_width = 0.35
|
||||
bar_positions = np.arange(len(categories))
|
||||
|
||||
# Grouper les données par label et créer les barres groupées
|
||||
for label in df_graph["has_purchased_target_period"].unique():
|
||||
label_data = df_graph[df_graph['has_purchased_target_period'] == label]
|
||||
values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]
|
||||
|
||||
label_printed = "purchased" if label else "no purchase"
|
||||
ax.bar(bar_positions, values, bar_width, label=label_printed)
|
||||
|
||||
# Mise à jour des positions des barres pour le prochain groupe
|
||||
bar_positions = [pos + bar_width for pos in bar_positions]
|
||||
|
||||
# Ajout des étiquettes, de la légende, etc.
|
||||
ax.set_xlabel('Company')
|
||||
ax.set_ylabel('Company')
|
||||
ax.set_title(f'Consent of mailing according to target for {type_of_activity}')
|
||||
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
|
||||
ax.set_xticklabels(categories)
|
||||
ax.legend()
|
||||
|
||||
# Affichage du plot
|
||||
plt.show()
|
||||
save_file_s3("mailing_consent_target_", type_of_activity)
|
||||
|
||||
|
||||
def gender_bar(customer, type_of_activity):
|
||||
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user