Merge pull request 'generalization' (#5) from generalization into main
Reviewed-on: #5
This commit is contained in:
commit
282d6cd8a5
|
@ -6,6 +6,7 @@ import os
|
||||||
import s3fs
|
import s3fs
|
||||||
import re
|
import re
|
||||||
import warnings
|
import warnings
|
||||||
|
from datetime import date, timedelta, datetime
|
||||||
|
|
||||||
# Create filesystem object
|
# Create filesystem object
|
||||||
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
|
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
|
||||||
|
@ -18,6 +19,47 @@ exec(open('0_KPI_functions.py').read())
|
||||||
# Ignore warning
|
# Ignore warning
|
||||||
warnings.filterwarnings('ignore')
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
|
||||||
|
def display_covering_time(df, company, datecover):
|
||||||
|
"""
|
||||||
|
This function draws the time coverage of each company
|
||||||
|
"""
|
||||||
|
min_date = df['purchase_date'].min().strftime("%Y-%m-%d")
|
||||||
|
max_date = df['purchase_date'].max().strftime("%Y-%m-%d")
|
||||||
|
datecover[company] = [datetime.strptime(min_date, "%Y-%m-%d") + timedelta(days=x) for x in range((datetime.strptime(max_date, "%Y-%m-%d") - datetime.strptime(min_date, "%Y-%m-%d")).days)]
|
||||||
|
print(f'Couverture Company {company} : {min_date} - {max_date}')
|
||||||
|
return datecover
|
||||||
|
|
||||||
|
|
||||||
|
def compute_time_intersection(datecover):
|
||||||
|
"""
|
||||||
|
This function returns the time coverage for all companies
|
||||||
|
"""
|
||||||
|
timestamps_sets = [set(timestamps) for timestamps in datecover.values()]
|
||||||
|
intersection = set.intersection(*timestamps_sets)
|
||||||
|
intersection_list = list(intersection)
|
||||||
|
formated_dates = [dt.strftime("%Y-%m-%d") for dt in intersection_list]
|
||||||
|
return sorted(formated_dates)
|
||||||
|
|
||||||
|
|
||||||
|
def df_coverage_modelization(sport, coverage_train = 0.7):
|
||||||
|
"""
|
||||||
|
This function returns start_date, end_of_features and final dates
|
||||||
|
that help to construct train and test datasets
|
||||||
|
"""
|
||||||
|
datecover = {}
|
||||||
|
for company in sport:
|
||||||
|
df_products_purchased_reduced = display_databases(company, file_name = "products_purchased_reduced",
|
||||||
|
datetime_col = ['purchase_date'])
|
||||||
|
datecover = display_covering_time(df_products_purchased_reduced, company, datecover)
|
||||||
|
#print(datecover.keys())
|
||||||
|
dt_coverage = compute_time_intersection(datecover)
|
||||||
|
start_date = dt_coverage[0]
|
||||||
|
end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]
|
||||||
|
final_date = dt_coverage[-1]
|
||||||
|
return start_date, end_of_features, final_date
|
||||||
|
|
||||||
|
|
||||||
def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
||||||
|
|
||||||
# Import customerplus
|
# Import customerplus
|
||||||
|
@ -97,14 +139,25 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
||||||
|
|
||||||
## Exportation
|
## Exportation
|
||||||
|
|
||||||
# Dossier d'exportation
|
companies = {'musee' : ['1', '2', '3', '4', '101'],
|
||||||
BUCKET_OUT = "projet-bdc2324-team1/2_Output/Logistique Regression databases - First approach"
|
'sport': ['5', '6', '7', '8', '9'],
|
||||||
|
'musique' : ['10', '11', '12', '13', '14']}
|
||||||
|
|
||||||
# Dataset test
|
type_of_comp = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
|
||||||
dataset_test = dataset_construction(min_date = "2021-08-01", end_features_date = "2023-08-01", max_date = "2023-11-01", directory_path = "1")
|
list_of_comp = companies[type_of_comp]
|
||||||
|
# Dossier d'exportation
|
||||||
|
BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}'
|
||||||
|
|
||||||
|
# Create test dataset and train dataset for sport companies
|
||||||
|
|
||||||
|
start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)
|
||||||
|
|
||||||
|
for company in list_of_comp:
|
||||||
|
dataset_test = dataset_construction(min_date = start_date, end_features_date = end_of_features,
|
||||||
|
max_date = final_date, directory_path = company)
|
||||||
|
|
||||||
# Exportation
|
# Exportation
|
||||||
FILE_KEY_OUT_S3 = "dataset_test.csv"
|
FILE_KEY_OUT_S3 = "dataset_test" + company + ".csv"
|
||||||
FILE_PATH_OUT_S3 = BUCKET_OUT + "/" + FILE_KEY_OUT_S3
|
FILE_PATH_OUT_S3 = BUCKET_OUT + "/" + FILE_KEY_OUT_S3
|
||||||
|
|
||||||
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
||||||
|
@ -113,10 +166,10 @@ with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
||||||
print("Exportation dataset test : SUCCESS")
|
print("Exportation dataset test : SUCCESS")
|
||||||
|
|
||||||
# Dataset train
|
# Dataset train
|
||||||
dataset_train = dataset_construction(min_date = "2021-05-01", end_features_date = "2023-05-01", max_date = "2023-08-01", directory_path = "1")
|
dataset_train = dataset_construction(min_date = start_date, end_features_date = end_of_features,
|
||||||
|
max_date = final_date, directory_path = company)
|
||||||
# Export
|
# Export
|
||||||
FILE_KEY_OUT_S3 = "dataset_train.csv"
|
FILE_KEY_OUT_S3 = "dataset_train" + company + ".csv"
|
||||||
FILE_PATH_OUT_S3 = BUCKET_OUT + "/" + FILE_KEY_OUT_S3
|
FILE_PATH_OUT_S3 = BUCKET_OUT + "/" + FILE_KEY_OUT_S3
|
||||||
|
|
||||||
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
||||||
|
|
|
@ -46,7 +46,7 @@ def tickets_kpi_function(tickets_information = None):
|
||||||
|
|
||||||
# Dummy : Canal de vente en ligne
|
# Dummy : Canal de vente en ligne
|
||||||
liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance
|
liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance
|
||||||
tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].str.contains('|'.join(liste_mots), case=False).astype(int)
|
tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].fillna('').str.contains('|'.join(liste_mots), case=False).astype(int)
|
||||||
|
|
||||||
# Proportion de vente en ligne
|
# Proportion de vente en ligne
|
||||||
prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id'])['ticket_id'].count().reset_index()
|
prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id'])['ticket_id'].count().reset_index()
|
||||||
|
|
854
Sport/exploration_sport.ipynb
Normal file
854
Sport/exploration_sport.ipynb
Normal file
|
@ -0,0 +1,854 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 114,
|
||||||
|
"id": "314bf34b-1f6d-4a99-8f82-aa71ebacdabc",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import os\n",
|
||||||
|
"import s3fs\n",
|
||||||
|
"import warnings\n",
|
||||||
|
"from datetime import date, timedelta, datetime\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"exec(open('../0_KPI_functions.py').read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 33,
|
||||||
|
"id": "a276822a-c389-429e-b249-8a9e47758bfc",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Ignore warning\n",
|
||||||
|
"warnings.filterwarnings('ignore')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 34,
|
||||||
|
"id": "f62b996c-4e17-40ea-83ba-f0cb60be7671",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"['bdc2324-data/1',\n",
|
||||||
|
" 'bdc2324-data/10',\n",
|
||||||
|
" 'bdc2324-data/101',\n",
|
||||||
|
" 'bdc2324-data/11',\n",
|
||||||
|
" 'bdc2324-data/12',\n",
|
||||||
|
" 'bdc2324-data/13',\n",
|
||||||
|
" 'bdc2324-data/14',\n",
|
||||||
|
" 'bdc2324-data/2',\n",
|
||||||
|
" 'bdc2324-data/3',\n",
|
||||||
|
" 'bdc2324-data/4',\n",
|
||||||
|
" 'bdc2324-data/5',\n",
|
||||||
|
" 'bdc2324-data/6',\n",
|
||||||
|
" 'bdc2324-data/7',\n",
|
||||||
|
" 'bdc2324-data/8',\n",
|
||||||
|
" 'bdc2324-data/9']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 34,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Create filesystem object\n",
|
||||||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||||||
|
"\n",
|
||||||
|
"BUCKET = \"bdc2324-data\"\n",
|
||||||
|
"fs.ls(BUCKET)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2c829aa8-2006-4e72-889b-7096dd55718b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Look at the time sequence of each company and compute inter time coverage"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 73,
|
||||||
|
"id": "e86864b7-4852-449a-8680-638559d56080",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sport = ['5', '6', '7', '8', '9']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 90,
|
||||||
|
"id": "7634ec57-4891-4684-8638-1e1643baca28",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def display_covering_time(df, company, datecover):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function draws the time coverage of each company\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
||||||
|
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
||||||
|
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
||||||
|
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
||||||
|
" return datecover"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 91,
|
||||||
|
"id": "53c83f51-822c-4e05-8c7c-89aa327603c6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def compute_time_intersection(datecover):\n",
|
||||||
|
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
||||||
|
" intersection = set.intersection(*timestamps_sets)\n",
|
||||||
|
" intersection_list = list(intersection)\n",
|
||||||
|
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
||||||
|
" return sorted(formated_dates)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 93,
|
||||||
|
"id": "eec152de-078e-44c4-ad6e-74ae6ba5c65a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def df_coverage_modelization(sport, coverage_train = 0.7):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function returns start_date, end_of_features and final dates\n",
|
||||||
|
" that help to construct train and test datasets\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" datecover = {}\n",
|
||||||
|
" for company in sport:\n",
|
||||||
|
" df_products_purchased_reduced = display_databases(company, file_name = \"products_purchased_reduced\",\n",
|
||||||
|
" datetime_col = ['purchase_date'])\n",
|
||||||
|
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
||||||
|
" #print(datecover.keys())\n",
|
||||||
|
" dt_coverage = compute_time_intersection(datecover)\n",
|
||||||
|
" start_date = dt_coverage[0]\n",
|
||||||
|
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
||||||
|
" final_date = dt_coverage[-1]\n",
|
||||||
|
" return start_date, end_of_features, final_date\n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 94,
|
||||||
|
"id": "348f246a-bc2d-4bbc-ba05-aa825da15a69",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
|
||||||
|
"Couverture Company 5 : 2019-04-15 - 2023-11-09\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/products_purchased_reduced.csv\n",
|
||||||
|
"Couverture Company 6 : 2018-06-28 - 2023-11-08\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_7/products_purchased_reduced.csv\n",
|
||||||
|
"Couverture Company 7 : 2015-02-10 - 2023-11-08\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_8/products_purchased_reduced.csv\n",
|
||||||
|
"Couverture Company 8 : 2010-09-28 - 2023-11-08\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_9/products_purchased_reduced.csv\n",
|
||||||
|
"Couverture Company 9 : 2014-09-22 - 2023-10-24\n",
|
||||||
|
"dict_keys(['5', '6', '7', '8', '9'])\n",
|
||||||
|
"2019-04-15 2022-06-15 2023-10-23\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"start_date, end_of_features, final_date = df_coverage_modelization(sport, coverage_train = 0.7)\n",
|
||||||
|
"print(start_date, end_of_features, final_date )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "34ddc267-4daa-4926-9d54-5b13d4212eaa",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Look at common database between Sport companies"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 101,
|
||||||
|
"id": "389387fa-2046-4811-b8dd-6d524e91fe2e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"['bdc2324-data/5',\n",
|
||||||
|
" 'bdc2324-data/6',\n",
|
||||||
|
" 'bdc2324-data/7',\n",
|
||||||
|
" 'bdc2324-data/8',\n",
|
||||||
|
" 'bdc2324-data/9']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 101,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"companies = fs.ls(BUCKET)\n",
|
||||||
|
"companies = [company for company in companies if any(company.endswith(end) for end in sport)]\n",
|
||||||
|
"companies"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 107,
|
||||||
|
"id": "895fc2b3-c768-454d-bedb-54994e4d211a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Number of databases : 30\n",
|
||||||
|
"Number of common databases : 23\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"companies_database = {}\n",
|
||||||
|
"\n",
|
||||||
|
"for company in companies:\n",
|
||||||
|
" companies_database[company.split('/')[-1]] = [file.split('/')[-1].replace(company.split('/')[-1], '') for file in fs.ls(company)] \n",
|
||||||
|
"\n",
|
||||||
|
"all_database = companies_database[max(companies_database, key=lambda x: len(companies_database[x]))]\n",
|
||||||
|
"print(\"Number of databases : \",len(all_database))\n",
|
||||||
|
"\n",
|
||||||
|
"data_in_common = set(all_database)\n",
|
||||||
|
"\n",
|
||||||
|
"for key in companies_database:\n",
|
||||||
|
" diff_database = data_in_common.symmetric_difference(companies_database[key])\n",
|
||||||
|
" data_in_common = data_in_common - diff_database\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Number of common databases : \",len(data_in_common))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 121,
|
||||||
|
"id": "0c06517d-f5b7-4104-94fa-0e3f843c5881",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'campaign_stats.csv',\n",
|
||||||
|
" 'campaigns.csv',\n",
|
||||||
|
" 'categories.csv',\n",
|
||||||
|
" 'countries.csv',\n",
|
||||||
|
" 'currencies.csv',\n",
|
||||||
|
" 'customer_target_mappings.csv',\n",
|
||||||
|
" 'customersplus.csv',\n",
|
||||||
|
" 'event_types.csv',\n",
|
||||||
|
" 'events.csv',\n",
|
||||||
|
" 'facilities.csv',\n",
|
||||||
|
" 'link_stats.csv',\n",
|
||||||
|
" 'pricing_formulas.csv',\n",
|
||||||
|
" 'product_packs.csv',\n",
|
||||||
|
" 'products.csv',\n",
|
||||||
|
" 'products_groups.csv',\n",
|
||||||
|
" 'purchases.csv',\n",
|
||||||
|
" 'representation_category_capacities.csv',\n",
|
||||||
|
" 'representations.csv',\n",
|
||||||
|
" 'seasons.csv',\n",
|
||||||
|
" 'suppliers.csv',\n",
|
||||||
|
" 'target_types.csv',\n",
|
||||||
|
" 'targets.csv',\n",
|
||||||
|
" 'tickets.csv'}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 121,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"data_in_common"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "1af245aa-44a7-453b-90f9-0c4bcc415cd0",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Investigate errors from data construction for company 6"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 108,
|
||||||
|
"id": "538a5ca2-a50d-4726-93eb-c2b0d0ab8400",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"directory_path = '6'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 143,
|
||||||
|
"id": "1ca3fb71-930a-441c-b35b-b98bca780606",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/customerplus_cleaned.csv\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/campaigns_information.csv\n",
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/products_purchased_reduced.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_customerplus_clean = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
||||||
|
"df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
||||||
|
"df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 144,
|
||||||
|
"id": "2ad3052c-e9e6-4ef9-abe2-4b8b2306a2b9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"max_date = pd.to_datetime(final_date, utc = True, format = 'ISO8601') \n",
|
||||||
|
"end_features_date = pd.to_datetime(end_of_features, utc = True, format = 'ISO8601')\n",
|
||||||
|
"min_date = pd.to_datetime(start_date, utc = True, format = 'ISO8601')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 128,
|
||||||
|
"id": "146999f2-ab92-4b7c-8c57-2e3ac8c4dd88",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/campaigns_information.csv\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 133,
|
||||||
|
"id": "7448a7b9-3edf-4177-9df2-a260ebbee45e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Timestamp('2022-06-15 00:00:00+0000', tz='UTC')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 133,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"end_features_date"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 136,
|
||||||
|
"id": "d8e954ab-65d4-4f36-8410-69bf664773a7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Shape campaigns_information : (1333010, 8)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>id</th>\n",
|
||||||
|
" <th>customer_id</th>\n",
|
||||||
|
" <th>opened_at</th>\n",
|
||||||
|
" <th>sent_at</th>\n",
|
||||||
|
" <th>delivered_at</th>\n",
|
||||||
|
" <th>campaign_name</th>\n",
|
||||||
|
" <th>campaign_service_id</th>\n",
|
||||||
|
" <th>campaign_sent_at</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>38</td>\n",
|
||||||
|
" <td>NaT</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:33+00:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>Adhérents non ré-engagés</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>26135</td>\n",
|
||||||
|
" <td>NaT</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:34+00:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>Adhérents non ré-engagés</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3876</td>\n",
|
||||||
|
" <td>NaT</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:35+00:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>Adhérents non ré-engagés</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>26226</td>\n",
|
||||||
|
" <td>NaT</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:35+00:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>Adhérents non ré-engagés</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>25349</td>\n",
|
||||||
|
" <td>NaT</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:34+00:00</td>\n",
|
||||||
|
" <td>NaN</td>\n",
|
||||||
|
" <td>Adhérents non ré-engagés</td>\n",
|
||||||
|
" <td>15</td>\n",
|
||||||
|
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" id customer_id opened_at sent_at delivered_at \\\n",
|
||||||
|
"0 1 38 NaT 2022-08-02 18:31:33+00:00 NaN \n",
|
||||||
|
"1 2 26135 NaT 2022-08-02 18:31:34+00:00 NaN \n",
|
||||||
|
"2 3 3876 NaT 2022-08-02 18:31:35+00:00 NaN \n",
|
||||||
|
"3 4 26226 NaT 2022-08-02 18:31:35+00:00 NaN \n",
|
||||||
|
"4 5 25349 NaT 2022-08-02 18:31:34+00:00 NaN \n",
|
||||||
|
"\n",
|
||||||
|
" campaign_name campaign_service_id campaign_sent_at \n",
|
||||||
|
"0 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||||||
|
"1 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||||||
|
"2 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||||||
|
"3 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||||||
|
"4 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 136,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(\"Shape campaigns_information : \", df_campaigns_information.shape)\n",
|
||||||
|
"df_campaigns_information.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 134,
|
||||||
|
"id": "93eceaf1-ce4c-4dfa-9c51-4fd016d09fc5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Timestamp('2022-08-02 18:31:33+0000', tz='UTC')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 134,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_campaigns_information['sent_at'].min()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 137,
|
||||||
|
"id": "ea50cab4-1dae-4efe-ae3c-22b6f9ad1d26",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"Timestamp('2023-11-07 10:08:16+0000', tz='UTC')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 137,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_campaigns_information['sent_at'].max()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 127,
|
||||||
|
"id": "dcb87bc9-caf5-4655-9cfa-4a3dad504bac",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>id</th>\n",
|
||||||
|
" <th>customer_id</th>\n",
|
||||||
|
" <th>opened_at</th>\n",
|
||||||
|
" <th>sent_at</th>\n",
|
||||||
|
" <th>delivered_at</th>\n",
|
||||||
|
" <th>campaign_name</th>\n",
|
||||||
|
" <th>campaign_service_id</th>\n",
|
||||||
|
" <th>campaign_sent_at</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"Empty DataFrame\n",
|
||||||
|
"Columns: [id, customer_id, opened_at, sent_at, delivered_at, campaign_name, campaign_service_id, campaign_sent_at]\n",
|
||||||
|
"Index: []"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 127,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#Filtre de la base df_campaigns_information\n",
|
||||||
|
"df_campaigns_information = df_campaigns_information[(df_campaigns_information['sent_at'] <= end_features_date) & (df_campaigns_information['sent_at'] >= min_date)]\n",
|
||||||
|
"df_campaigns_information"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 145,
|
||||||
|
"id": "abe22e09-a041-4349-be8f-b0784f2f0a98",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>ticket_id</th>\n",
|
||||||
|
" <th>customer_id</th>\n",
|
||||||
|
" <th>purchase_id</th>\n",
|
||||||
|
" <th>event_type_id</th>\n",
|
||||||
|
" <th>supplier_name</th>\n",
|
||||||
|
" <th>purchase_date</th>\n",
|
||||||
|
" <th>amount</th>\n",
|
||||||
|
" <th>is_full_price</th>\n",
|
||||||
|
" <th>name_event_types</th>\n",
|
||||||
|
" <th>name_facilities</th>\n",
|
||||||
|
" <th>name_categories</th>\n",
|
||||||
|
" <th>name_events</th>\n",
|
||||||
|
" <th>name_seasons</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>49</th>\n",
|
||||||
|
" <td>91401</td>\n",
|
||||||
|
" <td>108392</td>\n",
|
||||||
|
" <td>1259025.0</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>caisse</td>\n",
|
||||||
|
" <td>2022-02-27 13:44:10.690000+00:00</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>ligue 1 uber eats</td>\n",
|
||||||
|
" <td>stade de l'aube</td>\n",
|
||||||
|
" <td>honneur basse</td>\n",
|
||||||
|
" <td>olympique de marseille</td>\n",
|
||||||
|
" <td>saison 2021-2022</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>117</th>\n",
|
||||||
|
" <td>535527</td>\n",
|
||||||
|
" <td>31304</td>\n",
|
||||||
|
" <td>136629.0</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>adhésion</td>\n",
|
||||||
|
" <td>2022-04-28 15:47:52.790000+00:00</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>ligue 1 uber eats</td>\n",
|
||||||
|
" <td>stade de l'aube</td>\n",
|
||||||
|
" <td>honneur basse</td>\n",
|
||||||
|
" <td>ac ajaccio</td>\n",
|
||||||
|
" <td>saison 2022-2023</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>274</th>\n",
|
||||||
|
" <td>547400</td>\n",
|
||||||
|
" <td>192</td>\n",
|
||||||
|
" <td>140477.0</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>adhésion</td>\n",
|
||||||
|
" <td>2022-04-28 15:47:54.053000+00:00</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>ligue 1 uber eats</td>\n",
|
||||||
|
" <td>stade de l'aube</td>\n",
|
||||||
|
" <td>honneur basse</td>\n",
|
||||||
|
" <td>rc strasbourg</td>\n",
|
||||||
|
" <td>saison 2022-2023</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>304</th>\n",
|
||||||
|
" <td>84413</td>\n",
|
||||||
|
" <td>31388</td>\n",
|
||||||
|
" <td>20259.0</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>adhésion</td>\n",
|
||||||
|
" <td>2021-08-03 13:45:01.603000+00:00</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>ligue 1 uber eats</td>\n",
|
||||||
|
" <td>stade de l'aube</td>\n",
|
||||||
|
" <td>vitoux haute</td>\n",
|
||||||
|
" <td>olympique de marseille</td>\n",
|
||||||
|
" <td>saison 2021-2022</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>311</th>\n",
|
||||||
|
" <td>407271</td>\n",
|
||||||
|
" <td>3265</td>\n",
|
||||||
|
" <td>90527.0</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>web [adhésion]</td>\n",
|
||||||
|
" <td>2022-05-26 09:15:40.993000+00:00</td>\n",
|
||||||
|
" <td>0.0</td>\n",
|
||||||
|
" <td>False</td>\n",
|
||||||
|
" <td>ligue 1 uber eats</td>\n",
|
||||||
|
" <td>stade de l'aube</td>\n",
|
||||||
|
" <td>champagne basse</td>\n",
|
||||||
|
" <td>stade brestois 29</td>\n",
|
||||||
|
" <td>saison 2022-2023</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
|
||||||
|
"49 91401 108392 1259025.0 4 caisse \n",
|
||||||
|
"117 535527 31304 136629.0 4 adhésion \n",
|
||||||
|
"274 547400 192 140477.0 4 adhésion \n",
|
||||||
|
"304 84413 31388 20259.0 4 adhésion \n",
|
||||||
|
"311 407271 3265 90527.0 4 web [adhésion] \n",
|
||||||
|
"\n",
|
||||||
|
" purchase_date amount is_full_price \\\n",
|
||||||
|
"49 2022-02-27 13:44:10.690000+00:00 0.0 False \n",
|
||||||
|
"117 2022-04-28 15:47:52.790000+00:00 0.0 False \n",
|
||||||
|
"274 2022-04-28 15:47:54.053000+00:00 0.0 False \n",
|
||||||
|
"304 2021-08-03 13:45:01.603000+00:00 0.0 False \n",
|
||||||
|
"311 2022-05-26 09:15:40.993000+00:00 0.0 False \n",
|
||||||
|
"\n",
|
||||||
|
" name_event_types name_facilities name_categories \\\n",
|
||||||
|
"49 ligue 1 uber eats stade de l'aube honneur basse \n",
|
||||||
|
"117 ligue 1 uber eats stade de l'aube honneur basse \n",
|
||||||
|
"274 ligue 1 uber eats stade de l'aube honneur basse \n",
|
||||||
|
"304 ligue 1 uber eats stade de l'aube vitoux haute \n",
|
||||||
|
"311 ligue 1 uber eats stade de l'aube champagne basse \n",
|
||||||
|
"\n",
|
||||||
|
" name_events name_seasons \n",
|
||||||
|
"49 olympique de marseille saison 2021-2022 \n",
|
||||||
|
"117 ac ajaccio saison 2022-2023 \n",
|
||||||
|
"274 rc strasbourg saison 2022-2023 \n",
|
||||||
|
"304 olympique de marseille saison 2021-2022 \n",
|
||||||
|
"311 stade brestois 29 saison 2022-2023 "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 145,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"#Filtre de la base df_products_purchased_reduced\n",
|
||||||
|
"df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]\n",
|
||||||
|
"df_products_purchased_reduced.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 150,
|
||||||
|
"id": "ae7ef3a6-5b42-4a3c-a108-fec9f2ec4d32",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array(['caisse', 'adhésion', 'web [adhésion]', 'web [grand public]',\n",
|
||||||
|
" 'itr ticketmaster', 'itr fnac', nan, 'decathlon', 'boutique web',\n",
|
||||||
|
" 'boutique officielle'], dtype=object)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 150,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"df_products_purchased_reduced[\"supplier_name\"].unique()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 151,
|
||||||
|
"id": "942f58a5-8ed4-4b18-a7a2-bd296447fa6a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# KPI sur le comportement d'achat\n",
|
||||||
|
"tickets_information_copy = df_products_purchased_reduced.copy()\n",
|
||||||
|
"# Dummy : Canal de vente en ligne\n",
|
||||||
|
"liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n",
|
||||||
|
"tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].fillna('').str.contains('|'.join(liste_mots), case=False).astype(int)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "658b57cd-4fb8-4552-a582-972144b2af1c",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"tickets_information_copy['vente_internet'] corrected by handling na"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f086a8dc-69ab-4cf3-b25e-379d7da02f43",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user