This commit is contained in:
Alexis REVELLE 2024-03-14 21:14:40 +00:00
parent 54fbad0344
commit db6eaaaa8d
3 changed files with 141 additions and 3 deletions

View File

@ -0,0 +1,103 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "718d4e6d-b90a-4955-90ee-c1518246c07c",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Choisissez le type de compagnie : sport ? musique ? musee ? sport\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"import warnings\n",
"\n",
"# Ignore warning\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"exec(open('../0_KPI_functions.py').read())\n",
"exec(open('plot.py').read())\n",
"\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
" 'sport': ['5', '6'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}\n",
"\n",
"\n",
"type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')\n",
"list_of_comp = companies[type_of_activity] \n",
"\n",
"# Load files\n",
"customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b967f70a-e3ae-423e-9fb0-edfc00ddf826",
"metadata": {},
"outputs": [],
"source": [
"# Identify anonymous customer for each company and remove them from our datasets\n",
"outlier_list = outlier_detection(tickets, list_of_comp)\n",
"\n",
"# Identify valid customer (customer who bought tickets after starting date or received mails after starting date)\n",
"customer_valid_list = valid_customer_detection(products, campaigns_brut)\n",
"\n",
"# Identify customer who bought during the period of y\n",
"consumer_target_period = identify_purchase_during_target_periode(products)\n",
"\n",
"databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]\n",
"\n",
"for dataset in databases:\n",
" dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier\n",
" dataset['customer_id'] = dataset['customer_id'].isin(customer_valid_list) # keep only valid customer\n",
" dataset['has_purchased_target_period'] = np.where(dataset['customer_id'].isin(customer_valid_list), 1, 0)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -50,6 +50,8 @@ maximum_price_paid(customer, type_of_activity)
mailing_consent(customer, type_of_activity) mailing_consent(customer, type_of_activity)
mailing_consent_by_target(customer)
#gender_bar(customer, type_of_activity) #gender_bar(customer, type_of_activity)
#country_bar(customer, type_of_activity) #country_bar(customer, type_of_activity)

View File

@ -70,7 +70,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
total_amount_share_index = total_amount_share.set_index('customer_id') total_amount_share_index = total_amount_share.set_index('customer_id')
df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False) df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)
print('df circulaire : ', df_circulaire.head()) #print('df circulaire : ', df_circulaire.head())
top = df_circulaire[:1] top = df_circulaire[:1]
print('top : ', top) print('top : ', top)
outlier_list.append(top.index[0]) outlier_list.append(top.index[0])
@ -101,7 +101,7 @@ def valid_customer_detection(products, campaigns_brut):
def identify_purchase_during_target_periode(products): def identify_purchase_during_target_periode(products):
products_target_period = products[products['purchase_date']>="2022-11-01" & products['purchase_date']<="2023-11-01"] products_target_period = products[(products['purchase_date']>="2022-11-01") & (products['purchase_date']<="2023-11-01")]
consumer_target_period = products_target_period['customer_id'].to_list() consumer_target_period = products_target_period['customer_id'].to_list()
return consumer_target_period return consumer_target_period
@ -140,13 +140,46 @@ def mailing_consent(customer, type_of_activity):
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"]) plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
plt.xlabel('Company') plt.xlabel('Company')
plt.ylabel('Consent of mailing (%)') plt.ylabel('Company')
plt.title(f'Consent of mailing for {type_of_activity}') plt.title(f'Consent of mailing for {type_of_activity}')
plt.show() plt.show()
save_file_s3("mailing_consent_", type_of_activity) save_file_s3("mailing_consent_", type_of_activity)
def mailing_consent_by_target(customer):
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
# Création du barplot groupé
fig, ax = plt.subplots(figsize=(10, 6))
categories = df_graph["number_company"].unique()
bar_width = 0.35
bar_positions = np.arange(len(categories))
# Grouper les données par label et créer les barres groupées
for label in df_graph["has_purchased_target_period"].unique():
label_data = df_graph[df_graph['has_purchased_target_period'] == label]
values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]
label_printed = "purchased" if label else "no purchase"
ax.bar(bar_positions, values, bar_width, label=label_printed)
# Mise à jour des positions des barres pour le prochain groupe
bar_positions = [pos + bar_width for pos in bar_positions]
# Ajout des étiquettes, de la légende, etc.
ax.set_xlabel('Company')
ax.set_ylabel('Company')
ax.set_title(f'Consent of mailing according to target for {type_of_activity}')
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
ax.set_xticklabels(categories)
ax.legend()
# Affichage du plot
plt.show()
save_file_s3("mailing_consent_target_", type_of_activity)
def gender_bar(customer, type_of_activity): def gender_bar(customer, type_of_activity):
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index() company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()