6593 lines
224 KiB
Plaintext
6593 lines
224 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5bf5c226",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Business Data Challenge - Team 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "b1a5b9d3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import re\n",
|
||
"import warnings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ecfa2219",
|
||
"metadata": {},
|
||
"source": [
|
||
"Configuration de l'accès aux données"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "1a094277",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"id": "30d77451-2df6-4c07-8b15-66e0e990ff03",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"\n",
|
||
"# Import cleaning and merge functions\n",
|
||
"\n",
|
||
"exec(open('0_Cleaning_and_merge_functions.py').read())\n",
|
||
"\n",
|
||
"exec(open('0_KPI_functions.py').read())\n",
|
||
"\n",
|
||
"# Ignore warning\n",
|
||
"warnings.filterwarnings('ignore')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "f1b44d3e-76bb-4860-b9db-a2840db7cf39",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def load_dataset_2(directory_path, file_name):\n",
|
||
" \"\"\"\n",
|
||
" This function loads csv file\n",
|
||
" \"\"\"\n",
|
||
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
" # drop na :\n",
|
||
" #df = df.dropna(axis=1, thresh=len(df))\n",
|
||
" # if identifier in table : delete it\n",
|
||
" if 'identifier' in df.columns:\n",
|
||
" df = df.drop(columns = 'identifier')\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"id": "31ab76f0-fbb1-46f6-b359-97228620c207",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def export_in_temporary(df, output_name):\n",
|
||
" print('Export of dataset :', output_name)\n",
|
||
" FILE_PATH_OUT_S3 = \"projet-bdc2324-team1/Temporary\" + \"/\" + output_name + '.csv'\n",
|
||
" with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
|
||
" df.to_csv(file_out, index = False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ccf597b0-b459-4ea5-baf0-5ba8c90915e4",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Cleaning target area and tags"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"id": "28316e1d-7892-4506-9d53-0695e71aa7bc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1targets.csv\n",
|
||
"Shape : (287, 5)\n",
|
||
"Number of columns : 3\n",
|
||
"Columns : Index(['id', 'target_type_id', 'name'], dtype='object')\n",
|
||
"File path : bdc2324-data/1/1target_types.csv\n",
|
||
"Shape : (4, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'is_import', 'name'], dtype='object')\n",
|
||
"File path : bdc2324-data/1/1customer_target_mappings.csv\n",
|
||
"Shape : (768024, 7)\n",
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'customer_id', 'target_id', 'name', 'extra_field'], dtype='object')\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"target_example = preprocessing_target_area('1')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "da467695-ce37-485d-94ab-f1499d56c3a3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>target_type_is_import</th>\n",
|
||
" <th>target_type_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1184824</td>\n",
|
||
" <td>645400</td>\n",
|
||
" <td>DDCP PROMO Réseau livres</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1184825</td>\n",
|
||
" <td>645400</td>\n",
|
||
" <td>Inscrits NL générale site web</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1184828</td>\n",
|
||
" <td>645402</td>\n",
|
||
" <td>DDCP PROMO Art contemporain</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1184829</td>\n",
|
||
" <td>645403</td>\n",
|
||
" <td>DDCP PROMO Art contemporain</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1295770</td>\n",
|
||
" <td>647301</td>\n",
|
||
" <td>Votre première liste</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>768019</th>\n",
|
||
" <td>2737545</td>\n",
|
||
" <td>666983</td>\n",
|
||
" <td>Inscrits NL générale site web</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>768020</th>\n",
|
||
" <td>2737546</td>\n",
|
||
" <td>666983</td>\n",
|
||
" <td>Votre première liste</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>768021</th>\n",
|
||
" <td>2737575</td>\n",
|
||
" <td>666986</td>\n",
|
||
" <td>Votre première liste</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>768022</th>\n",
|
||
" <td>2737576</td>\n",
|
||
" <td>666987</td>\n",
|
||
" <td>Inscrits NL générale site web</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>768023</th>\n",
|
||
" <td>2737577</td>\n",
|
||
" <td>666987</td>\n",
|
||
" <td>Votre première liste</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>768024 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_name \\\n",
|
||
"0 1184824 645400 DDCP PROMO Réseau livres \n",
|
||
"1 1184825 645400 Inscrits NL générale site web \n",
|
||
"2 1184828 645402 DDCP PROMO Art contemporain \n",
|
||
"3 1184829 645403 DDCP PROMO Art contemporain \n",
|
||
"4 1295770 647301 Votre première liste \n",
|
||
"... ... ... ... \n",
|
||
"768019 2737545 666983 Inscrits NL générale site web \n",
|
||
"768020 2737546 666983 Votre première liste \n",
|
||
"768021 2737575 666986 Votre première liste \n",
|
||
"768022 2737576 666987 Inscrits NL générale site web \n",
|
||
"768023 2737577 666987 Votre première liste \n",
|
||
"\n",
|
||
" target_type_is_import target_type_name \n",
|
||
"0 False manual_static_filter \n",
|
||
"1 False manual_static_filter \n",
|
||
"2 False manual_static_filter \n",
|
||
"3 False manual_static_filter \n",
|
||
"4 False manual_static_filter \n",
|
||
"... ... ... \n",
|
||
"768019 False manual_static_filter \n",
|
||
"768020 False manual_static_filter \n",
|
||
"768021 False manual_static_filter \n",
|
||
"768022 False manual_static_filter \n",
|
||
"768023 False manual_static_filter \n",
|
||
"\n",
|
||
"[768024 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"target_example"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "fd88e294-e038-4cec-ad94-2bbbc10a4059",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_targets</th>\n",
|
||
" <th>all_targets</th>\n",
|
||
" <th>all_target_types</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>28</td>\n",
|
||
" <td>consentement optin jeune public, DDCP rentrée ...</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter, ma...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>consentement optin jeune public, consentement ...</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter, ma...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>traversee du port de commerce (gagnant et perd...</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter, ma...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Arenametrix_bascule tel vers sib, consentement...</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter, ma...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Arenametrix_bascule tel vers sib, consentement...</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter, ma...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>151154</th>\n",
|
||
" <td>1256136</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>consentement optin b2c</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>151155</th>\n",
|
||
" <td>1256137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>consentement optin b2c</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>151156</th>\n",
|
||
" <td>1256138</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Inscrits NL jeune public site web, Inscrits NL...</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter, ma...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>151157</th>\n",
|
||
" <td>1256139</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Inscrits NL jeune public site web, Inscrits NL...</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter, ma...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>151158</th>\n",
|
||
" <td>1256140</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>DRE MucemLab, consentement optin dre</td>\n",
|
||
" <td>manual_static_filter, manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>151159 rows × 4 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_targets \\\n",
|
||
"0 1 28 \n",
|
||
"1 2 7 \n",
|
||
"2 3 11 \n",
|
||
"3 4 6 \n",
|
||
"4 5 4 \n",
|
||
"... ... ... \n",
|
||
"151154 1256136 1 \n",
|
||
"151155 1256137 1 \n",
|
||
"151156 1256138 3 \n",
|
||
"151157 1256139 3 \n",
|
||
"151158 1256140 2 \n",
|
||
"\n",
|
||
" all_targets \\\n",
|
||
"0 consentement optin jeune public, DDCP rentrée ... \n",
|
||
"1 consentement optin jeune public, consentement ... \n",
|
||
"2 traversee du port de commerce (gagnant et perd... \n",
|
||
"3 Arenametrix_bascule tel vers sib, consentement... \n",
|
||
"4 Arenametrix_bascule tel vers sib, consentement... \n",
|
||
"... ... \n",
|
||
"151154 consentement optin b2c \n",
|
||
"151155 consentement optin b2c \n",
|
||
"151156 Inscrits NL jeune public site web, Inscrits NL... \n",
|
||
"151157 Inscrits NL jeune public site web, Inscrits NL... \n",
|
||
"151158 DRE MucemLab, consentement optin dre \n",
|
||
"\n",
|
||
" all_target_types \n",
|
||
"0 manual_static_filter, manual_static_filter, ma... \n",
|
||
"1 manual_static_filter, manual_static_filter, ma... \n",
|
||
"2 manual_static_filter, manual_static_filter, ma... \n",
|
||
"3 manual_static_filter, manual_static_filter, ma... \n",
|
||
"4 manual_static_filter, manual_static_filter, ma... \n",
|
||
"... ... \n",
|
||
"151154 manual_static_filter \n",
|
||
"151155 manual_static_filter \n",
|
||
"151156 manual_static_filter, manual_static_filter, ma... \n",
|
||
"151157 manual_static_filter, manual_static_filter, ma... \n",
|
||
"151158 manual_static_filter, manual_static_filter \n",
|
||
"\n",
|
||
"[151159 rows x 4 columns]"
|
||
]
|
||
},
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tenant_id = '1'\n",
|
||
"\n",
|
||
"def concatenate_names(names):\n",
|
||
" return ', '.join(names)\n",
|
||
" \n",
|
||
"target_example =display_databases(tenant_id, \"target_information\")\n",
|
||
"\n",
|
||
"target_example['target_name'] = target_example['target_name'].fillna('').str.lower()\n",
|
||
"\n",
|
||
"\n",
|
||
"target_example['jeune'] = target_example['target_name'].str.contains('|'.join(['jeune', 'pass_culture']), case=False).astype(int)\n",
|
||
"target_example['optin'] = target_example['target_name'].str.contains('|'.join(['optin' ,'opt-in']), case=False).astype(int)\n",
|
||
"target_example['optout'] = target_example['target_name'].str.contains('|'.join(['optout']), case=False).astype(int)\n",
|
||
"target_example['scolaire'] = target_example['target_name'].str.contains('|'.join(['scolaire' , 'enseignant', 'chercheur', 'schulen', 'école']), case=False).astype(int)\n",
|
||
"target_example['entreprise'] = target_example['target_name'].str.contains('|'.join(['b2b']), case=False).astype(int)\n",
|
||
"target_example['famille'] = target_example['target_name'].str.contains('|'.join(['famille', 'enfants']), case=False).astype(int)\n",
|
||
"target_example['newsletter'] = target_example['target_name'].str.contains('|'.join(['nl', 'newsletter']), case=False).astype(int)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"target_agg = target_example.groupby('customer_id').agg(\n",
|
||
" nb_targets=('target_name', 'nunique'), # Utilisation de tuples pour spécifier les noms de colonnes\n",
|
||
" all_targets=('target_name', concatenate_names),\n",
|
||
" all_target_types=('target_type_name', concatenate_names)\n",
|
||
").reset_index()\n",
|
||
"target_agg"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "c75efea3-b5e8-4a7a-bed4-dd64ae9ff9f2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Export of dataset : Target_kpi_concatenate\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"export_in_temporary(target_agg, 'Target_kpi_concatenate')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "cb6f06e6-78de-4b8d-a103-8366eff0493a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"v"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "c5e864b1-adad-4267-b956-3f7ef371d677",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"def display_covering_time(df, company, datecover):\n",
|
||
" \"\"\"\n",
|
||
" This function draws the time coverage of each company\n",
|
||
" \"\"\"\n",
|
||
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
||
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
||
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
||
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
||
" return datecover\n",
|
||
"\n",
|
||
"\n",
|
||
"def compute_time_intersection(datecover):\n",
|
||
" \"\"\"\n",
|
||
" This function returns the time coverage for all companies\n",
|
||
" \"\"\"\n",
|
||
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
||
" intersection = set.intersection(*timestamps_sets)\n",
|
||
" intersection_list = list(intersection)\n",
|
||
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
||
" return sorted(formated_dates)\n",
|
||
"\n",
|
||
"\n",
|
||
"def df_coverage_modelization(sport, coverage_features = 0.7):\n",
|
||
" \"\"\"\n",
|
||
" This function returns start_date, end_of_features and final dates\n",
|
||
" that help to construct train and test datasets\n",
|
||
" \"\"\"\n",
|
||
" datecover = {}\n",
|
||
" for company in sport:\n",
|
||
" df_products_purchased_reduced = display_input_databases(company, file_name = \"products_purchased_reduced\",\n",
|
||
" datetime_col = ['purchase_date'])\n",
|
||
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
||
" #print(datecover.keys())\n",
|
||
" dt_coverage = compute_time_intersection(datecover)\n",
|
||
" start_date = dt_coverage[0]\n",
|
||
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
||
" final_date = dt_coverage[-1]\n",
|
||
" return start_date, end_of_features, final_date\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"# Bases non communes : mise à plat"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "f8f988fb-5aab-4b57-80d1-e242f7e5b384",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"companies = {'musee' : ['1', '2', '3', '4'],\n",
|
||
" 'sport': ['5', '6', '7', '8', '9'],\n",
|
||
" 'musique' : ['10', '11', '12', '13', '14']}\n",
|
||
"\n",
|
||
"all_companies = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "35ac004f-c191-4f45-a4b1-6d993d9ec38c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"companies_databases = pd.DataFrame()\n",
|
||
"\n",
|
||
"for i in all_companies:\n",
|
||
" company_databases = pd.DataFrame({'company_number' : [i]})\n",
|
||
"\n",
|
||
" BUCKET = \"bdc2324-data/\"+i\n",
|
||
" for base in fs.ls(BUCKET):\n",
|
||
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
|
||
" if match:\n",
|
||
" nom_base = match.group(3)\n",
|
||
" company_databases[nom_base] = 1\n",
|
||
"\n",
|
||
" companies_databases = pd.concat([companies_databases, company_databases])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "8986e477-e6c5-4d6c-83b2-2c90c134b599",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>company_number</th>\n",
|
||
" <th>campaign_stats</th>\n",
|
||
" <th>campaigns</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>countries</th>\n",
|
||
" <th>currencies</th>\n",
|
||
" <th>customer_target_mappings</th>\n",
|
||
" <th>customersplus</th>\n",
|
||
" <th>event_types</th>\n",
|
||
" <th>events</th>\n",
|
||
" <th>facilities</th>\n",
|
||
" <th>link_stats</th>\n",
|
||
" <th>pricing_formulas</th>\n",
|
||
" <th>product_packs</th>\n",
|
||
" <th>products</th>\n",
|
||
" <th>products_groups</th>\n",
|
||
" <th>purchases</th>\n",
|
||
" <th>representation_category_capacities</th>\n",
|
||
" <th>representations</th>\n",
|
||
" <th>seasons</th>\n",
|
||
" <th>structure_tag_mappings</th>\n",
|
||
" <th>suppliers</th>\n",
|
||
" <th>tags</th>\n",
|
||
" <th>target_types</th>\n",
|
||
" <th>targets</th>\n",
|
||
" <th>tickets</th>\n",
|
||
" <th>type_of_categories</th>\n",
|
||
" <th>type_of_pricing_formulas</th>\n",
|
||
" <th>type_ofs</th>\n",
|
||
" <th>contribution_sites</th>\n",
|
||
" <th>contributions</th>\n",
|
||
" <th>consumptions</th>\n",
|
||
" <th>representation_types</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" company_number campaign_stats campaigns categories countries \\\n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 2 1 1 1 1 \n",
|
||
"0 3 1 1 1 1 \n",
|
||
"0 4 1 1 1 1 \n",
|
||
"0 5 1 1 1 1 \n",
|
||
"0 6 1 1 1 1 \n",
|
||
"0 7 1 1 1 1 \n",
|
||
"0 8 1 1 1 1 \n",
|
||
"0 9 1 1 1 1 \n",
|
||
"0 10 1 1 1 1 \n",
|
||
"0 11 1 1 1 1 \n",
|
||
"0 12 1 1 1 1 \n",
|
||
"0 13 1 1 1 1 \n",
|
||
"0 14 1 1 1 1 \n",
|
||
"\n",
|
||
" currencies customer_target_mappings customersplus event_types events \\\n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"\n",
|
||
" facilities link_stats pricing_formulas product_packs products \\\n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"0 1 1 1 1 1 \n",
|
||
"\n",
|
||
" products_groups purchases representation_category_capacities \\\n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"0 1 1 1 \n",
|
||
"\n",
|
||
" representations seasons structure_tag_mappings suppliers tags \\\n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 NaN 1 NaN \n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 NaN 1 NaN \n",
|
||
"0 1 1 NaN 1 NaN \n",
|
||
"0 1 1 NaN 1 1.0 \n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 NaN 1 NaN \n",
|
||
"0 1 1 1.0 1 1.0 \n",
|
||
"0 1 1 NaN 1 NaN \n",
|
||
"\n",
|
||
" target_types targets tickets type_of_categories \\\n",
|
||
"0 1 1 1 1.0 \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 1.0 \n",
|
||
"0 1 1 1 1.0 \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 NaN \n",
|
||
"0 1 1 1 1.0 \n",
|
||
"\n",
|
||
" type_of_pricing_formulas type_ofs contribution_sites contributions \\\n",
|
||
"0 1.0 1.0 NaN NaN \n",
|
||
"0 NaN NaN 1.0 1.0 \n",
|
||
"0 NaN NaN 1.0 1.0 \n",
|
||
"0 1.0 1.0 1.0 1.0 \n",
|
||
"0 NaN NaN NaN NaN \n",
|
||
"0 1.0 1.0 NaN NaN \n",
|
||
"0 1.0 1.0 NaN NaN \n",
|
||
"0 1.0 1.0 NaN NaN \n",
|
||
"0 NaN NaN NaN NaN \n",
|
||
"0 1.0 1.0 NaN NaN \n",
|
||
"0 NaN NaN NaN NaN \n",
|
||
"0 NaN 1.0 NaN NaN \n",
|
||
"0 NaN NaN NaN NaN \n",
|
||
"0 1.0 1.0 NaN NaN \n",
|
||
"\n",
|
||
" consumptions representation_types \n",
|
||
"0 NaN NaN \n",
|
||
"0 NaN NaN \n",
|
||
"0 1.0 NaN \n",
|
||
"0 NaN NaN \n",
|
||
"0 1.0 NaN \n",
|
||
"0 1.0 NaN \n",
|
||
"0 1.0 1.0 \n",
|
||
"0 NaN NaN \n",
|
||
"0 NaN NaN \n",
|
||
"0 NaN 1.0 \n",
|
||
"0 NaN NaN \n",
|
||
"0 1.0 NaN \n",
|
||
"0 NaN 1.0 \n",
|
||
"0 NaN 1.0 "
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.set_option(\"display.max_columns\", None)\n",
|
||
"companies_databases\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "8fecc3bb-4c03-4144-97c5-615224d9729e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pd.reset_option(\"display.max_columns\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "0294ce71-840e-458b-8ffa-cadabbc6da21",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Debut Travail 25/02"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## Base communes au types Musée"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "dbce1124-9a22-4502-a47a-fc3d0e2db70b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"companies['musee']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5080f66e-f779-410a-876d-b4fe2795e17e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"for i in companies['musique']:\n",
|
||
" BUCKET = \"bdc2324-data/\"+i\n",
|
||
" liste_base = []\n",
|
||
" for base in fs.ls(BUCKET):\n",
|
||
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
|
||
" if match:\n",
|
||
" nom_base = match.group(3)\n",
|
||
" liste_base.append(nom_base)\n",
|
||
" globals()['base_'+i] = liste_base\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "abd477e1-7479-4c88-a5aa-f987af3f5b79",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Trouver l'intersection entre les cinq listes\n",
|
||
"intersection = set(base_1).intersection(base_2, base_3, base_4, base_101)\n",
|
||
"\n",
|
||
"# Convertir le résultat en liste si nécessaire\n",
|
||
"intersection_liste = list(intersection)\n",
|
||
"\n",
|
||
"print(intersection_liste)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "8d93888f-a511-4ee5-8bc3-d5173a7f119e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Trouver l'intersection entre les cinq listes\n",
|
||
"intersection = set(base_10).intersection(base_12, base_13, base_14, base_11)\n",
|
||
"\n",
|
||
"# Convertir le résultat en liste si nécessaire\n",
|
||
"intersection_liste = list(intersection)\n",
|
||
"\n",
|
||
"print(intersection_liste)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "10e89669-42bb-4652-a4bc-1a3d1caf4d1a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"len(intersection_liste)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7d058b21-a538-4f59-aefb-ef7966f73fdc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_tags = load_dataset_2(\"1\", \"tags\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "aa441f99-733c-4675-8676-bed4682d3324",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_structure_tag_mappings = load_dataset_2(\"1\", 'structure_tag_mappings')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6767a750-14a4-4c05-903e-d2f07170825b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_customersplus = load_dataset_2(\"1\", \"customersplus\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "125e9145-a815-46fd-bdf4-07589508b259",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_customersplus.groupby('structure_id')['id'].count().reset_index().sort_values('id', ascending=False).head(20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "c17a6976-792f-474d-bcff-c89396eddb3f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_customersplus['structure_id'].isna().sum() / len(df1_customersplus['structure_id'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "ecfc155a-cb42-46ec-8da5-33fdcd087355",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"len(df1_structure_tag_mappings)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "071410b8-950d-4fcc-b2b9-57415253c286",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_structure_tag_mappings.groupby('tag_id')['structure_id'].count().reset_index().sort_values('structure_id', ascending=False).head(20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f48d27a9-14e4-4bb9-a60a-73e9438b58fc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"?np.sort_values()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "14eaa0ea-02cc-430b-ab9b-38e6637810c3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def info_colonnes_dataframe(df):\n",
|
||
" # Créer une liste pour stocker les informations sur chaque colonne\n",
|
||
" infos_colonnes = []\n",
|
||
"\n",
|
||
" # Parcourir les colonnes du DataFrame\n",
|
||
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
|
||
" # Calculer le taux de valeurs manquantes\n",
|
||
" taux_na = serie.isna().mean() * 100\n",
|
||
"\n",
|
||
" # Ajouter les informations à la liste\n",
|
||
" infos_colonnes.append({\n",
|
||
" 'Nom_colonne': nom_colonne,\n",
|
||
" 'Type_colonne': str(serie.dtype),\n",
|
||
" 'Taux_NA': taux_na\n",
|
||
" })\n",
|
||
"\n",
|
||
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
|
||
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
|
||
"\n",
|
||
" return df_infos_colonnes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6b031c32-d4c8-42a5-9a71-a7810f9bf8d8",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"info_colonnes_dataframe(df1_tags)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e1a87f27-c4d4-4832-ac20-0c3c54aa4980",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"info_colonnes_dataframe(df1_structure_tag_mappings)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "fa5c65a8-2f74-4f3f-85fc-9ac91e0bb361",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pd.set_option('display.max_colwidth', None)\n",
|
||
"\n",
|
||
"print(df1_tags['name'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "a59bf932-5b54-4600-81f5-c55ac93ae510",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pd.set_option('display.max_rows', None)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "a4ab298e-2cae-4865-9f00-4caff5f75ea1",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"print(df1_tags['name'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "76bffba1-5f7e-4308-9224-437ca66148f8",
|
||
"metadata": {},
|
||
"source": [
|
||
"## KPI sur target_type"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f6daf22e-6583-4431-a467-660a1dd4e5a4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d91d5895",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pd.set_option('display.max_colwidth', None)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c58b17d3",
|
||
"metadata": {},
|
||
"source": [
|
||
"Raisonnement : on prends les target_type qui représente 90% des clients et on fait des catégories dessus."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d74426b3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"targets = load_dataset_2(\"3\", \"targets\")\n",
|
||
"target_types = load_dataset_2(\"3\", \"target_types\")\n",
|
||
"\n",
|
||
"# target_all = pd.merge(targets, target_types, left_on= 'target_type_id', right_on= 'id' ,how = 'inner')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"id": "6930bff5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def print_main_target(tenant_id, nb_print = 40):\n",
|
||
" df_target = display_databases(tenant_id, \"target_information\")\n",
|
||
"\n",
|
||
" print('Nombre de ciblage : ', len(df_target))\n",
|
||
" nb_customers = df_target['customer_id'].nunique()\n",
|
||
" print('Nombre de client avec étiquette target : ', nb_customers) \n",
|
||
"\n",
|
||
" nb_custumers_per_target = df_target.groupby(\"target_name\")['customer_id'].count().reset_index().sort_values('customer_id', ascending=False)\n",
|
||
" nb_custumers_per_target['cumulative_customers'] = nb_custumers_per_target['customer_id'].cumsum()/len(df_target)\n",
|
||
" nb_custumers_per_target['customer_id'] = nb_custumers_per_target['customer_id']/nb_customers\n",
|
||
"\n",
|
||
" return nb_custumers_per_target.head(nb_print)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"id": "1e7ee1a0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n",
|
||
"Nombre de ciblage : 768024\n",
|
||
"Nombre de client avec étiquette target : 151159\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>cumulative_customers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>161</th>\n",
|
||
" <td>consentement optin mediation specialisee</td>\n",
|
||
" <td>0.992333</td>\n",
|
||
" <td>0.195306</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>consentement optin jeune public</td>\n",
|
||
" <td>0.992194</td>\n",
|
||
" <td>0.390585</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>158</th>\n",
|
||
" <td>consentement optin b2c</td>\n",
|
||
" <td>0.720493</td>\n",
|
||
" <td>0.532390</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>Arenametrix_bascule tel vers sib</td>\n",
|
||
" <td>0.232973</td>\n",
|
||
" <td>0.578242</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>165</th>\n",
|
||
" <td>consentement optout b2c</td>\n",
|
||
" <td>0.228389</td>\n",
|
||
" <td>0.623193</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>COM Inscrits NL générale (historique)</td>\n",
|
||
" <td>0.152191</td>\n",
|
||
" <td>0.653146</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>consentement optin newsletter generale</td>\n",
|
||
" <td>0.146171</td>\n",
|
||
" <td>0.681915</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>169</th>\n",
|
||
" <td>consentement optout newsletter generale</td>\n",
|
||
" <td>0.124736</td>\n",
|
||
" <td>0.706465</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>170</th>\n",
|
||
" <td>consentement optout scolaires</td>\n",
|
||
" <td>0.104155</td>\n",
|
||
" <td>0.726964</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>166</th>\n",
|
||
" <td>consentement optout dre</td>\n",
|
||
" <td>0.094788</td>\n",
|
||
" <td>0.745620</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>164</th>\n",
|
||
" <td>consentement optout b2b</td>\n",
|
||
" <td>0.094067</td>\n",
|
||
" <td>0.764134</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>126</th>\n",
|
||
" <td>Inscrits NL générale (export_291019 + operation_videomaton)</td>\n",
|
||
" <td>0.093187</td>\n",
|
||
" <td>0.782474</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>157</th>\n",
|
||
" <td>consentement optin b2b</td>\n",
|
||
" <td>0.084249</td>\n",
|
||
" <td>0.799056</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>216</th>\n",
|
||
" <td>ddcp_visiteurs dps 010622</td>\n",
|
||
" <td>0.081735</td>\n",
|
||
" <td>0.815142</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>Contacts_prenomsdoubles</td>\n",
|
||
" <td>0.077025</td>\n",
|
||
" <td>0.830302</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>115</th>\n",
|
||
" <td>FORMATION _ acheteurs optin last year</td>\n",
|
||
" <td>0.069364</td>\n",
|
||
" <td>0.843954</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>214</th>\n",
|
||
" <td>ddcp_promo_visiteurs occasionnels_musee_8mois</td>\n",
|
||
" <td>0.043927</td>\n",
|
||
" <td>0.852600</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>189</th>\n",
|
||
" <td>ddcp_promo_md_musée_dps 011019</td>\n",
|
||
" <td>0.039759</td>\n",
|
||
" <td>0.860425</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>188</th>\n",
|
||
" <td>ddcp_promo_MD_billet_musée_oct_2019_agarder2</td>\n",
|
||
" <td>0.036266</td>\n",
|
||
" <td>0.867563</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>163</th>\n",
|
||
" <td>consentement optin scolaires</td>\n",
|
||
" <td>0.032079</td>\n",
|
||
" <td>0.873876</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>159</th>\n",
|
||
" <td>consentement optin dre</td>\n",
|
||
" <td>0.029949</td>\n",
|
||
" <td>0.879771</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>34</th>\n",
|
||
" <td>DDCP Newsletter enseignants</td>\n",
|
||
" <td>0.029836</td>\n",
|
||
" <td>0.885643</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>36</th>\n",
|
||
" <td>DDCP Newsletter jeune public</td>\n",
|
||
" <td>0.025549</td>\n",
|
||
" <td>0.890671</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>127</th>\n",
|
||
" <td>Inscrits NL générale site web</td>\n",
|
||
" <td>0.024689</td>\n",
|
||
" <td>0.895531</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>145</th>\n",
|
||
" <td>Votre première liste</td>\n",
|
||
" <td>0.024577</td>\n",
|
||
" <td>0.900368</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>61</th>\n",
|
||
" <td>DDCP billets famille</td>\n",
|
||
" <td>0.023876</td>\n",
|
||
" <td>0.905067</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>106</th>\n",
|
||
" <td>DRE MucemLab</td>\n",
|
||
" <td>0.015229</td>\n",
|
||
" <td>0.908064</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>39</th>\n",
|
||
" <td>DDCP Newsletter relais champ social</td>\n",
|
||
" <td>0.015017</td>\n",
|
||
" <td>0.911020</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110</th>\n",
|
||
" <td>DRE institutionnels</td>\n",
|
||
" <td>0.014746</td>\n",
|
||
" <td>0.913922</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>48</th>\n",
|
||
" <td>DDCP PROMO Participants ateliers (adultes et enfants)</td>\n",
|
||
" <td>0.012927</td>\n",
|
||
" <td>0.916466</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>74</th>\n",
|
||
" <td>DDCP promo Plan B 2019 (concerts)</td>\n",
|
||
" <td>0.012887</td>\n",
|
||
" <td>0.919003</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>72</th>\n",
|
||
" <td>DDCP promo MD pass musées dps oct 2018</td>\n",
|
||
" <td>0.011809</td>\n",
|
||
" <td>0.921327</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>94</th>\n",
|
||
" <td>DDCP rentrée culturelle 2023</td>\n",
|
||
" <td>0.011624</td>\n",
|
||
" <td>0.923614</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>DDCP MD Procès du Siècle</td>\n",
|
||
" <td>0.011141</td>\n",
|
||
" <td>0.925807</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>186</th>\n",
|
||
" <td>ddcp_md_scene_ouverte_au_talent</td>\n",
|
||
" <td>0.010433</td>\n",
|
||
" <td>0.927860</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>108</th>\n",
|
||
" <td>DRE chercheurs</td>\n",
|
||
" <td>0.010300</td>\n",
|
||
" <td>0.929888</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>220</th>\n",
|
||
" <td>festival_jean_rouch</td>\n",
|
||
" <td>0.009937</td>\n",
|
||
" <td>0.931843</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>105</th>\n",
|
||
" <td>DRE Festival Jean Rouch</td>\n",
|
||
" <td>0.009937</td>\n",
|
||
" <td>0.933799</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>275</th>\n",
|
||
" <td>structures_etiquette champ social</td>\n",
|
||
" <td>0.009844</td>\n",
|
||
" <td>0.935736</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>86</th>\n",
|
||
" <td>DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)</td>\n",
|
||
" <td>0.008554</td>\n",
|
||
" <td>0.937420</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>128</th>\n",
|
||
" <td>Inscrits NL jeune public site web</td>\n",
|
||
" <td>0.008263</td>\n",
|
||
" <td>0.939046</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>260</th>\n",
|
||
" <td>rappel po barvalo</td>\n",
|
||
" <td>0.008256</td>\n",
|
||
" <td>0.940671</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>104</th>\n",
|
||
" <td>DDCP_marseille_jazz_2023</td>\n",
|
||
" <td>0.006900</td>\n",
|
||
" <td>0.942029</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>32</th>\n",
|
||
" <td>DDCP Newsletter centres de loisirs</td>\n",
|
||
" <td>0.006827</td>\n",
|
||
" <td>0.943373</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>Autres_interet_exposition</td>\n",
|
||
" <td>0.006754</td>\n",
|
||
" <td>0.944702</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>228</th>\n",
|
||
" <td>import_arenametrix_contactstousecardouv_expo</td>\n",
|
||
" <td>0.006212</td>\n",
|
||
" <td>0.945925</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>117</th>\n",
|
||
" <td>Formation clients fidèles</td>\n",
|
||
" <td>0.006047</td>\n",
|
||
" <td>0.947115</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>DDCP Cine 2023</td>\n",
|
||
" <td>0.005656</td>\n",
|
||
" <td>0.948228</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>40</th>\n",
|
||
" <td>DDCP OLBJ! 2023</td>\n",
|
||
" <td>0.005464</td>\n",
|
||
" <td>0.949304</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>240</th>\n",
|
||
" <td>journee-de-l-inclusion_20230601_21h25</td>\n",
|
||
" <td>0.005326</td>\n",
|
||
" <td>0.950352</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>137</th>\n",
|
||
" <td>Questionnaire 2 satisfaction scolaire</td>\n",
|
||
" <td>0.005259</td>\n",
|
||
" <td>0.951387</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>93</th>\n",
|
||
" <td>DDCP rendez-vous de septembre offre spéciale</td>\n",
|
||
" <td>0.005253</td>\n",
|
||
" <td>0.952421</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>135</th>\n",
|
||
" <td>Plan B 2018 (électro)</td>\n",
|
||
" <td>0.005081</td>\n",
|
||
" <td>0.953421</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>270</th>\n",
|
||
" <td>save_the_date_populaire</td>\n",
|
||
" <td>0.004948</td>\n",
|
||
" <td>0.954395</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>132</th>\n",
|
||
" <td>Newsletter CCR (passerelle)</td>\n",
|
||
" <td>0.004783</td>\n",
|
||
" <td>0.955336</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>116</th>\n",
|
||
" <td>Fichier institutionnel (ne pas utiliser sans autorisation)</td>\n",
|
||
" <td>0.004538</td>\n",
|
||
" <td>0.956229</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>222</th>\n",
|
||
" <td>fichier institutionnel_ne_pas_toucher</td>\n",
|
||
" <td>0.004532</td>\n",
|
||
" <td>0.957121</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>266</th>\n",
|
||
" <td>reservations_payees_pass_culture_190422_au_310123</td>\n",
|
||
" <td>0.004492</td>\n",
|
||
" <td>0.958005</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>102</th>\n",
|
||
" <td>DDCP spectateurs Marseille Jazz 18-19-21</td>\n",
|
||
" <td>0.004432</td>\n",
|
||
" <td>0.958878</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147</th>\n",
|
||
" <td>acid arab</td>\n",
|
||
" <td>0.004413</td>\n",
|
||
" <td>0.959746</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" target_name \\\n",
|
||
"161 consentement optin mediation specialisee \n",
|
||
"160 consentement optin jeune public \n",
|
||
"158 consentement optin b2c \n",
|
||
"5 Arenametrix_bascule tel vers sib \n",
|
||
"165 consentement optout b2c \n",
|
||
"19 COM Inscrits NL générale (historique) \n",
|
||
"162 consentement optin newsletter generale \n",
|
||
"169 consentement optout newsletter generale \n",
|
||
"170 consentement optout scolaires \n",
|
||
"166 consentement optout dre \n",
|
||
"164 consentement optout b2b \n",
|
||
"126 Inscrits NL générale (export_291019 + operation_videomaton) \n",
|
||
"157 consentement optin b2b \n",
|
||
"216 ddcp_visiteurs dps 010622 \n",
|
||
"20 Contacts_prenomsdoubles \n",
|
||
"115 FORMATION _ acheteurs optin last year \n",
|
||
"214 ddcp_promo_visiteurs occasionnels_musee_8mois \n",
|
||
"189 ddcp_promo_md_musée_dps 011019 \n",
|
||
"188 ddcp_promo_MD_billet_musée_oct_2019_agarder2 \n",
|
||
"163 consentement optin scolaires \n",
|
||
"159 consentement optin dre \n",
|
||
"34 DDCP Newsletter enseignants \n",
|
||
"36 DDCP Newsletter jeune public \n",
|
||
"127 Inscrits NL générale site web \n",
|
||
"145 Votre première liste \n",
|
||
"61 DDCP billets famille \n",
|
||
"106 DRE MucemLab \n",
|
||
"39 DDCP Newsletter relais champ social \n",
|
||
"110 DRE institutionnels \n",
|
||
"48 DDCP PROMO Participants ateliers (adultes et enfants) \n",
|
||
"74 DDCP promo Plan B 2019 (concerts) \n",
|
||
"72 DDCP promo MD pass musées dps oct 2018 \n",
|
||
"94 DDCP rentrée culturelle 2023 \n",
|
||
"23 DDCP MD Procès du Siècle \n",
|
||
"186 ddcp_md_scene_ouverte_au_talent \n",
|
||
"108 DRE chercheurs \n",
|
||
"220 festival_jean_rouch \n",
|
||
"105 DRE Festival Jean Rouch \n",
|
||
"275 structures_etiquette champ social \n",
|
||
"86 DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers) \n",
|
||
"128 Inscrits NL jeune public site web \n",
|
||
"260 rappel po barvalo \n",
|
||
"104 DDCP_marseille_jazz_2023 \n",
|
||
"32 DDCP Newsletter centres de loisirs \n",
|
||
"13 Autres_interet_exposition \n",
|
||
"228 import_arenametrix_contactstousecardouv_expo \n",
|
||
"117 Formation clients fidèles \n",
|
||
"22 DDCP Cine 2023 \n",
|
||
"40 DDCP OLBJ! 2023 \n",
|
||
"240 journee-de-l-inclusion_20230601_21h25 \n",
|
||
"137 Questionnaire 2 satisfaction scolaire \n",
|
||
"93 DDCP rendez-vous de septembre offre spéciale \n",
|
||
"135 Plan B 2018 (électro) \n",
|
||
"270 save_the_date_populaire \n",
|
||
"132 Newsletter CCR (passerelle) \n",
|
||
"116 Fichier institutionnel (ne pas utiliser sans autorisation) \n",
|
||
"222 fichier institutionnel_ne_pas_toucher \n",
|
||
"266 reservations_payees_pass_culture_190422_au_310123 \n",
|
||
"102 DDCP spectateurs Marseille Jazz 18-19-21 \n",
|
||
"147 acid arab \n",
|
||
"\n",
|
||
" customer_id cumulative_customers \n",
|
||
"161 0.992333 0.195306 \n",
|
||
"160 0.992194 0.390585 \n",
|
||
"158 0.720493 0.532390 \n",
|
||
"5 0.232973 0.578242 \n",
|
||
"165 0.228389 0.623193 \n",
|
||
"19 0.152191 0.653146 \n",
|
||
"162 0.146171 0.681915 \n",
|
||
"169 0.124736 0.706465 \n",
|
||
"170 0.104155 0.726964 \n",
|
||
"166 0.094788 0.745620 \n",
|
||
"164 0.094067 0.764134 \n",
|
||
"126 0.093187 0.782474 \n",
|
||
"157 0.084249 0.799056 \n",
|
||
"216 0.081735 0.815142 \n",
|
||
"20 0.077025 0.830302 \n",
|
||
"115 0.069364 0.843954 \n",
|
||
"214 0.043927 0.852600 \n",
|
||
"189 0.039759 0.860425 \n",
|
||
"188 0.036266 0.867563 \n",
|
||
"163 0.032079 0.873876 \n",
|
||
"159 0.029949 0.879771 \n",
|
||
"34 0.029836 0.885643 \n",
|
||
"36 0.025549 0.890671 \n",
|
||
"127 0.024689 0.895531 \n",
|
||
"145 0.024577 0.900368 \n",
|
||
"61 0.023876 0.905067 \n",
|
||
"106 0.015229 0.908064 \n",
|
||
"39 0.015017 0.911020 \n",
|
||
"110 0.014746 0.913922 \n",
|
||
"48 0.012927 0.916466 \n",
|
||
"74 0.012887 0.919003 \n",
|
||
"72 0.011809 0.921327 \n",
|
||
"94 0.011624 0.923614 \n",
|
||
"23 0.011141 0.925807 \n",
|
||
"186 0.010433 0.927860 \n",
|
||
"108 0.010300 0.929888 \n",
|
||
"220 0.009937 0.931843 \n",
|
||
"105 0.009937 0.933799 \n",
|
||
"275 0.009844 0.935736 \n",
|
||
"86 0.008554 0.937420 \n",
|
||
"128 0.008263 0.939046 \n",
|
||
"260 0.008256 0.940671 \n",
|
||
"104 0.006900 0.942029 \n",
|
||
"32 0.006827 0.943373 \n",
|
||
"13 0.006754 0.944702 \n",
|
||
"228 0.006212 0.945925 \n",
|
||
"117 0.006047 0.947115 \n",
|
||
"22 0.005656 0.948228 \n",
|
||
"40 0.005464 0.949304 \n",
|
||
"240 0.005326 0.950352 \n",
|
||
"137 0.005259 0.951387 \n",
|
||
"93 0.005253 0.952421 \n",
|
||
"135 0.005081 0.953421 \n",
|
||
"270 0.004948 0.954395 \n",
|
||
"132 0.004783 0.955336 \n",
|
||
"116 0.004538 0.956229 \n",
|
||
"222 0.004532 0.957121 \n",
|
||
"266 0.004492 0.958005 \n",
|
||
"102 0.004432 0.958878 \n",
|
||
"147 0.004413 0.959746 "
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.set_option(\"max_colwidth\", None)\n",
|
||
"print_main_target('1', 60)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "b57a28ac",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_2/target_information.csv\n",
|
||
"Nombre de ciblage : 260283\n",
|
||
"Nombre de client avec étiquette target : 233320\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>cumulative_customers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>Schokoladentour & Führungen Individuals</td>\n",
|
||
" <td>0.927906</td>\n",
|
||
" <td>0.831783</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Chocolateria Kurse 2023</td>\n",
|
||
" <td>0.073963</td>\n",
|
||
" <td>0.898084</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>mailxpert_contacts_2023-07-18_12-04-00 langue</td>\n",
|
||
" <td>0.025519</td>\n",
|
||
" <td>0.920959</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>mailxpert_contacts_2023-07-18_12-04-00</td>\n",
|
||
" <td>0.025519</td>\n",
|
||
" <td>0.943834</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>Newsletter opt-in Allgemein</td>\n",
|
||
" <td>0.022836</td>\n",
|
||
" <td>0.964304</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>Schokoladentour & Führungen Gruppen / Schulen</td>\n",
|
||
" <td>0.011555</td>\n",
|
||
" <td>0.974662</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>Newsletter DE</td>\n",
|
||
" <td>0.010749</td>\n",
|
||
" <td>0.984298</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>Newsletter FR</td>\n",
|
||
" <td>0.008520</td>\n",
|
||
" <td>0.991936</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>Newsletter EN</td>\n",
|
||
" <td>0.004286</td>\n",
|
||
" <td>0.995778</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Frauen in Zürich - Schulung</td>\n",
|
||
" <td>0.001003</td>\n",
|
||
" <td>0.996677</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>mailxpert_contacts_2023-07-18_13-25-45_inaktiv</td>\n",
|
||
" <td>0.000471</td>\n",
|
||
" <td>0.997099</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>Opt-in-Website DE</td>\n",
|
||
" <td>0.000030</td>\n",
|
||
" <td>0.997126</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>Opt-in Website EN</td>\n",
|
||
" <td>0.000009</td>\n",
|
||
" <td>0.997134</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>Opt-in Website FR</td>\n",
|
||
" <td>0.000004</td>\n",
|
||
" <td>0.997138</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>Votre première liste</td>\n",
|
||
" <td>0.000004</td>\n",
|
||
" <td>0.997142</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Activated contact EN</td>\n",
|
||
" <td>0.000004</td>\n",
|
||
" <td>0.997145</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Activated contact FR</td>\n",
|
||
" <td>0.000004</td>\n",
|
||
" <td>0.997149</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Activated contact DE</td>\n",
|
||
" <td>0.000004</td>\n",
|
||
" <td>0.997153</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" target_name customer_id \\\n",
|
||
"13 Schokoladentour & Führungen Individuals 0.927906 \n",
|
||
"3 Chocolateria Kurse 2023 0.073963 \n",
|
||
"16 mailxpert_contacts_2023-07-18_12-04-00 langue 0.025519 \n",
|
||
"15 mailxpert_contacts_2023-07-18_12-04-00 0.025519 \n",
|
||
"8 Newsletter opt-in Allgemein 0.022836 \n",
|
||
"12 Schokoladentour & Führungen Gruppen / Schulen 0.011555 \n",
|
||
"5 Newsletter DE 0.010749 \n",
|
||
"7 Newsletter FR 0.008520 \n",
|
||
"6 Newsletter EN 0.004286 \n",
|
||
"4 Frauen in Zürich - Schulung 0.001003 \n",
|
||
"17 mailxpert_contacts_2023-07-18_13-25-45_inaktiv 0.000471 \n",
|
||
"11 Opt-in-Website DE 0.000030 \n",
|
||
"9 Opt-in Website EN 0.000009 \n",
|
||
"10 Opt-in Website FR 0.000004 \n",
|
||
"14 Votre première liste 0.000004 \n",
|
||
"1 Activated contact EN 0.000004 \n",
|
||
"2 Activated contact FR 0.000004 \n",
|
||
"0 Activated contact DE 0.000004 \n",
|
||
"\n",
|
||
" cumulative_customers \n",
|
||
"13 0.831783 \n",
|
||
"3 0.898084 \n",
|
||
"16 0.920959 \n",
|
||
"15 0.943834 \n",
|
||
"8 0.964304 \n",
|
||
"12 0.974662 \n",
|
||
"5 0.984298 \n",
|
||
"7 0.991936 \n",
|
||
"6 0.995778 \n",
|
||
"4 0.996677 \n",
|
||
"17 0.997099 \n",
|
||
"11 0.997126 \n",
|
||
"9 0.997134 \n",
|
||
"10 0.997138 \n",
|
||
"14 0.997142 \n",
|
||
"1 0.997145 \n",
|
||
"2 0.997149 \n",
|
||
"0 0.997153 "
|
||
]
|
||
},
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"print_main_target('2', 25)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"id": "9a65991f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_3/target_information.csv\n",
|
||
"Nombre de ciblage : 1617362\n",
|
||
"Nombre de client avec étiquette target : 257018\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>cumulative_customers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>67</th>\n",
|
||
" <td>MKG_NLmensuelle_2021_OK</td>\n",
|
||
" <td>0.972348</td>\n",
|
||
" <td>0.154518</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>66</th>\n",
|
||
" <td>MKG_NLmensuelle_2021</td>\n",
|
||
" <td>0.956007</td>\n",
|
||
" <td>0.306439</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>119</th>\n",
|
||
" <td>consent_optin_nl</td>\n",
|
||
" <td>0.636648</td>\n",
|
||
" <td>0.407609</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>115</th>\n",
|
||
" <td>consent_optin_general</td>\n",
|
||
" <td>0.602506</td>\n",
|
||
" <td>0.503355</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>78</th>\n",
|
||
" <td>Mkg_NL_mensuelle3</td>\n",
|
||
" <td>0.404162</td>\n",
|
||
" <td>0.567581</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>125</th>\n",
|
||
" <td>consent_optout_general</td>\n",
|
||
" <td>0.368126</td>\n",
|
||
" <td>0.626081</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>104</th>\n",
|
||
" <td>TEST LOCBASE</td>\n",
|
||
" <td>0.350532</td>\n",
|
||
" <td>0.681784</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>68</th>\n",
|
||
" <td>MKG_Non_inscrit_liste_08-22</td>\n",
|
||
" <td>0.310605</td>\n",
|
||
" <td>0.731143</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>116</th>\n",
|
||
" <td>consent_optin_general_HISTORIQUE</td>\n",
|
||
" <td>0.301345</td>\n",
|
||
" <td>0.779030</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>80</th>\n",
|
||
" <td>Mkg_Zone_C</td>\n",
|
||
" <td>0.135298</td>\n",
|
||
" <td>0.800531</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>Acheteurs_100km_visite_depuismax5ans</td>\n",
|
||
" <td>0.091149</td>\n",
|
||
" <td>0.815015</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>54</th>\n",
|
||
" <td>Inscription NL ancien site web</td>\n",
|
||
" <td>0.083477</td>\n",
|
||
" <td>0.828281</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>112</th>\n",
|
||
" <td>consent_optin_equestre</td>\n",
|
||
" <td>0.083216</td>\n",
|
||
" <td>0.841505</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>79</th>\n",
|
||
" <td>Mkg_Zone_B</td>\n",
|
||
" <td>0.079889</td>\n",
|
||
" <td>0.854200</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>65</th>\n",
|
||
" <td>MKG_2022_ZoneB&ZoneC_Famille</td>\n",
|
||
" <td>0.072536</td>\n",
|
||
" <td>0.865727</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>111</th>\n",
|
||
" <td>consent_optin_b2b</td>\n",
|
||
" <td>0.064388</td>\n",
|
||
" <td>0.875959</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>102</th>\n",
|
||
" <td>Soft_Bounce_yahoo</td>\n",
|
||
" <td>0.064182</td>\n",
|
||
" <td>0.886158</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>100</th>\n",
|
||
" <td>Scénario Anniversaire</td>\n",
|
||
" <td>0.051249</td>\n",
|
||
" <td>0.894303</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>37</th>\n",
|
||
" <td>B2B_scolaire_et_centres_de_loisirs_2023</td>\n",
|
||
" <td>0.046732</td>\n",
|
||
" <td>0.901729</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>B2B_Sans étiquette</td>\n",
|
||
" <td>0.040472</td>\n",
|
||
" <td>0.908160</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>122</th>\n",
|
||
" <td>consent_optout_equestre</td>\n",
|
||
" <td>0.038865</td>\n",
|
||
" <td>0.914336</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>B2B_Inscrits newsletter Scolaires</td>\n",
|
||
" <td>0.038075</td>\n",
|
||
" <td>0.920387</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>28</th>\n",
|
||
" <td>B2B_historique_newsletter_SCOLAIRES</td>\n",
|
||
" <td>0.038040</td>\n",
|
||
" <td>0.926432</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>118</th>\n",
|
||
" <td>consent_optin_jdp</td>\n",
|
||
" <td>0.036110</td>\n",
|
||
" <td>0.932170</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>76</th>\n",
|
||
" <td>MKG_aire cantilienne</td>\n",
|
||
" <td>0.031908</td>\n",
|
||
" <td>0.937241</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>B2B_Sans étiquette FR+BE</td>\n",
|
||
" <td>0.029165</td>\n",
|
||
" <td>0.941876</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>108</th>\n",
|
||
" <td>b2b - écoles mai 2021</td>\n",
|
||
" <td>0.028574</td>\n",
|
||
" <td>0.946416</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>86</th>\n",
|
||
" <td>Ouvreur_NL_juin_2021</td>\n",
|
||
" <td>0.018193</td>\n",
|
||
" <td>0.949308</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>126</th>\n",
|
||
" <td>consent_optout_jdp</td>\n",
|
||
" <td>0.016816</td>\n",
|
||
" <td>0.951980</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>127</th>\n",
|
||
" <td>consent_optout_nl</td>\n",
|
||
" <td>0.016633</td>\n",
|
||
" <td>0.954623</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>B2B_CE_2023</td>\n",
|
||
" <td>0.016489</td>\n",
|
||
" <td>0.957243</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>106</th>\n",
|
||
" <td>Visiteurs Aout-Sept sans questionnaire</td>\n",
|
||
" <td>0.016275</td>\n",
|
||
" <td>0.959830</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>89</th>\n",
|
||
" <td>Pass Annuel en cours de validité</td>\n",
|
||
" <td>0.011540</td>\n",
|
||
" <td>0.961663</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>114</th>\n",
|
||
" <td>consent_optin_expositions</td>\n",
|
||
" <td>0.011388</td>\n",
|
||
" <td>0.963473</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>B2B_Sans étiquette hors FR+BE</td>\n",
|
||
" <td>0.011307</td>\n",
|
||
" <td>0.965270</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>113</th>\n",
|
||
" <td>consent_optin_evenements</td>\n",
|
||
" <td>0.011240</td>\n",
|
||
" <td>0.967056</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>32</th>\n",
|
||
" <td>B2B_liste_à_requalifier_CE</td>\n",
|
||
" <td>0.010742</td>\n",
|
||
" <td>0.968763</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110</th>\n",
|
||
" <td>consent_optin_abonnes_passannuels</td>\n",
|
||
" <td>0.009665</td>\n",
|
||
" <td>0.970299</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152</th>\n",
|
||
" <td>liste mécènes donateurs 01012023-31102023</td>\n",
|
||
" <td>0.008746</td>\n",
|
||
" <td>0.971689</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>34</th>\n",
|
||
" <td>B2B_liste_à_requalifier_SCOLAIRES</td>\n",
|
||
" <td>0.008688</td>\n",
|
||
" <td>0.973070</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>153</th>\n",
|
||
" <td>liste newsletter mécénat</td>\n",
|
||
" <td>0.008606</td>\n",
|
||
" <td>0.974437</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144</th>\n",
|
||
" <td>liste des donateurs iraiser don ponctuel 10122020-20012022</td>\n",
|
||
" <td>0.007591</td>\n",
|
||
" <td>0.975644</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>134</th>\n",
|
||
" <td>don ponctuel iRaiser 250122</td>\n",
|
||
" <td>0.007587</td>\n",
|
||
" <td>0.976849</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>77</th>\n",
|
||
" <td>MKG_visiteurs_juin_ES</td>\n",
|
||
" <td>0.007498</td>\n",
|
||
" <td>0.978041</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>72</th>\n",
|
||
" <td>MKG_acheteurs_domaine_noel_2021</td>\n",
|
||
" <td>0.006758</td>\n",
|
||
" <td>0.979115</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>Acheteurs Journées des Plantes oct. 2022</td>\n",
|
||
" <td>0.006038</td>\n",
|
||
" <td>0.980074</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96</th>\n",
|
||
" <td>Réponse au formulaire de satisfaction</td>\n",
|
||
" <td>0.005871</td>\n",
|
||
" <td>0.981007</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>B2B_Inscrits newsletter Collectivités et CSE</td>\n",
|
||
" <td>0.005786</td>\n",
|
||
" <td>0.981927</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>B2B_historique_newsletter_CE</td>\n",
|
||
" <td>0.005735</td>\n",
|
||
" <td>0.982838</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>70</th>\n",
|
||
" <td>MKG_acheteurs_1mois_pass_sanitaire</td>\n",
|
||
" <td>0.005731</td>\n",
|
||
" <td>0.983749</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>157</th>\n",
|
||
" <td>mec_expos_automne_2023</td>\n",
|
||
" <td>0.004972</td>\n",
|
||
" <td>0.984539</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abonnés Pass Annuel - dynamique</td>\n",
|
||
" <td>0.004856</td>\n",
|
||
" <td>0.985311</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>146</th>\n",
|
||
" <td>liste diffusion invitation Ingres</td>\n",
|
||
" <td>0.004564</td>\n",
|
||
" <td>0.986036</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>161</th>\n",
|
||
" <td>rattrpostvisite_nov21</td>\n",
|
||
" <td>0.004070</td>\n",
|
||
" <td>0.986683</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>166</th>\n",
|
||
" <td>réunion publique forêt 2023</td>\n",
|
||
" <td>0.003362</td>\n",
|
||
" <td>0.987217</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>94</th>\n",
|
||
" <td>Rattrapage_postvisite_novdec21v0</td>\n",
|
||
" <td>0.003358</td>\n",
|
||
" <td>0.987750</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71</th>\n",
|
||
" <td>MKG_acheteurs_domaine_et_noel_2021</td>\n",
|
||
" <td>0.003054</td>\n",
|
||
" <td>0.988236</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>93</th>\n",
|
||
" <td>Rattrapage_postvisite_novdec21_VF</td>\n",
|
||
" <td>0.003015</td>\n",
|
||
" <td>0.988715</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>B2B_GUIDES_2023</td>\n",
|
||
" <td>0.002650</td>\n",
|
||
" <td>0.989136</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>B2B_TOANGLOPHONE_2023</td>\n",
|
||
" <td>0.002521</td>\n",
|
||
" <td>0.989537</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>B2B_liste_à_requalifier_GUIDES</td>\n",
|
||
" <td>0.002405</td>\n",
|
||
" <td>0.989919</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>64</th>\n",
|
||
" <td>MKG_2021_Acheteurs_JDP_Octobre_rattrap</td>\n",
|
||
" <td>0.002268</td>\n",
|
||
" <td>0.990279</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>159</th>\n",
|
||
" <td>rattrapage1211_logs-071021_121121</td>\n",
|
||
" <td>0.002120</td>\n",
|
||
" <td>0.990616</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>141</th>\n",
|
||
" <td>jdp_invités_2_entrées_ oct2023</td>\n",
|
||
" <td>0.002120</td>\n",
|
||
" <td>0.990953</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>109</th>\n",
|
||
" <td>château de chantilly questionnaire</td>\n",
|
||
" <td>0.002004</td>\n",
|
||
" <td>0.991272</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>150</th>\n",
|
||
" <td>liste invités avant-première jdp mai 2023</td>\n",
|
||
" <td>0.002000</td>\n",
|
||
" <td>0.991589</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>136</th>\n",
|
||
" <td>gece</td>\n",
|
||
" <td>0.001980</td>\n",
|
||
" <td>0.991904</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>B2B_Autocariste_2023</td>\n",
|
||
" <td>0.001692</td>\n",
|
||
" <td>0.992173</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>158</th>\n",
|
||
" <td>mkg_2021_acheteurs_jdp_octobre_rattr_exclure_new</td>\n",
|
||
" <td>0.001607</td>\n",
|
||
" <td>0.992428</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>43</th>\n",
|
||
" <td>Formation_journéedesplantes75</td>\n",
|
||
" <td>0.001541</td>\n",
|
||
" <td>0.992673</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" target_name customer_id \\\n",
|
||
"67 MKG_NLmensuelle_2021_OK 0.972348 \n",
|
||
"66 MKG_NLmensuelle_2021 0.956007 \n",
|
||
"119 consent_optin_nl 0.636648 \n",
|
||
"115 consent_optin_general 0.602506 \n",
|
||
"78 Mkg_NL_mensuelle3 0.404162 \n",
|
||
"125 consent_optout_general 0.368126 \n",
|
||
"104 TEST LOCBASE 0.350532 \n",
|
||
"68 MKG_Non_inscrit_liste_08-22 0.310605 \n",
|
||
"116 consent_optin_general_HISTORIQUE 0.301345 \n",
|
||
"80 Mkg_Zone_C 0.135298 \n",
|
||
"7 Acheteurs_100km_visite_depuismax5ans 0.091149 \n",
|
||
"54 Inscription NL ancien site web 0.083477 \n",
|
||
"112 consent_optin_equestre 0.083216 \n",
|
||
"79 Mkg_Zone_B 0.079889 \n",
|
||
"65 MKG_2022_ZoneB&ZoneC_Famille 0.072536 \n",
|
||
"111 consent_optin_b2b 0.064388 \n",
|
||
"102 Soft_Bounce_yahoo 0.064182 \n",
|
||
"100 Scénario Anniversaire 0.051249 \n",
|
||
"37 B2B_scolaire_et_centres_de_loisirs_2023 0.046732 \n",
|
||
"20 B2B_Sans étiquette 0.040472 \n",
|
||
"122 consent_optout_equestre 0.038865 \n",
|
||
"17 B2B_Inscrits newsletter Scolaires 0.038075 \n",
|
||
"28 B2B_historique_newsletter_SCOLAIRES 0.038040 \n",
|
||
"118 consent_optin_jdp 0.036110 \n",
|
||
"76 MKG_aire cantilienne 0.031908 \n",
|
||
"21 B2B_Sans étiquette FR+BE 0.029165 \n",
|
||
"108 b2b - écoles mai 2021 0.028574 \n",
|
||
"86 Ouvreur_NL_juin_2021 0.018193 \n",
|
||
"126 consent_optout_jdp 0.016816 \n",
|
||
"127 consent_optout_nl 0.016633 \n",
|
||
"13 B2B_CE_2023 0.016489 \n",
|
||
"106 Visiteurs Aout-Sept sans questionnaire 0.016275 \n",
|
||
"89 Pass Annuel en cours de validité 0.011540 \n",
|
||
"114 consent_optin_expositions 0.011388 \n",
|
||
"22 B2B_Sans étiquette hors FR+BE 0.011307 \n",
|
||
"113 consent_optin_evenements 0.011240 \n",
|
||
"32 B2B_liste_à_requalifier_CE 0.010742 \n",
|
||
"110 consent_optin_abonnes_passannuels 0.009665 \n",
|
||
"152 liste mécènes donateurs 01012023-31102023 0.008746 \n",
|
||
"34 B2B_liste_à_requalifier_SCOLAIRES 0.008688 \n",
|
||
"153 liste newsletter mécénat 0.008606 \n",
|
||
"144 liste des donateurs iraiser don ponctuel 10122020-20012022 0.007591 \n",
|
||
"134 don ponctuel iRaiser 250122 0.007587 \n",
|
||
"77 MKG_visiteurs_juin_ES 0.007498 \n",
|
||
"72 MKG_acheteurs_domaine_noel_2021 0.006758 \n",
|
||
"6 Acheteurs Journées des Plantes oct. 2022 0.006038 \n",
|
||
"96 Réponse au formulaire de satisfaction 0.005871 \n",
|
||
"15 B2B_Inscrits newsletter Collectivités et CSE 0.005786 \n",
|
||
"26 B2B_historique_newsletter_CE 0.005735 \n",
|
||
"70 MKG_acheteurs_1mois_pass_sanitaire 0.005731 \n",
|
||
"157 mec_expos_automne_2023 0.004972 \n",
|
||
"2 Abonnés Pass Annuel - dynamique 0.004856 \n",
|
||
"146 liste diffusion invitation Ingres 0.004564 \n",
|
||
"161 rattrpostvisite_nov21 0.004070 \n",
|
||
"166 réunion publique forêt 2023 0.003362 \n",
|
||
"94 Rattrapage_postvisite_novdec21v0 0.003358 \n",
|
||
"71 MKG_acheteurs_domaine_et_noel_2021 0.003054 \n",
|
||
"93 Rattrapage_postvisite_novdec21_VF 0.003015 \n",
|
||
"14 B2B_GUIDES_2023 0.002650 \n",
|
||
"23 B2B_TOANGLOPHONE_2023 0.002521 \n",
|
||
"33 B2B_liste_à_requalifier_GUIDES 0.002405 \n",
|
||
"64 MKG_2021_Acheteurs_JDP_Octobre_rattrap 0.002268 \n",
|
||
"159 rattrapage1211_logs-071021_121121 0.002120 \n",
|
||
"141 jdp_invités_2_entrées_ oct2023 0.002120 \n",
|
||
"109 château de chantilly questionnaire 0.002004 \n",
|
||
"150 liste invités avant-première jdp mai 2023 0.002000 \n",
|
||
"136 gece 0.001980 \n",
|
||
"12 B2B_Autocariste_2023 0.001692 \n",
|
||
"158 mkg_2021_acheteurs_jdp_octobre_rattr_exclure_new 0.001607 \n",
|
||
"43 Formation_journéedesplantes75 0.001541 \n",
|
||
"\n",
|
||
" cumulative_customers \n",
|
||
"67 0.154518 \n",
|
||
"66 0.306439 \n",
|
||
"119 0.407609 \n",
|
||
"115 0.503355 \n",
|
||
"78 0.567581 \n",
|
||
"125 0.626081 \n",
|
||
"104 0.681784 \n",
|
||
"68 0.731143 \n",
|
||
"116 0.779030 \n",
|
||
"80 0.800531 \n",
|
||
"7 0.815015 \n",
|
||
"54 0.828281 \n",
|
||
"112 0.841505 \n",
|
||
"79 0.854200 \n",
|
||
"65 0.865727 \n",
|
||
"111 0.875959 \n",
|
||
"102 0.886158 \n",
|
||
"100 0.894303 \n",
|
||
"37 0.901729 \n",
|
||
"20 0.908160 \n",
|
||
"122 0.914336 \n",
|
||
"17 0.920387 \n",
|
||
"28 0.926432 \n",
|
||
"118 0.932170 \n",
|
||
"76 0.937241 \n",
|
||
"21 0.941876 \n",
|
||
"108 0.946416 \n",
|
||
"86 0.949308 \n",
|
||
"126 0.951980 \n",
|
||
"127 0.954623 \n",
|
||
"13 0.957243 \n",
|
||
"106 0.959830 \n",
|
||
"89 0.961663 \n",
|
||
"114 0.963473 \n",
|
||
"22 0.965270 \n",
|
||
"113 0.967056 \n",
|
||
"32 0.968763 \n",
|
||
"110 0.970299 \n",
|
||
"152 0.971689 \n",
|
||
"34 0.973070 \n",
|
||
"153 0.974437 \n",
|
||
"144 0.975644 \n",
|
||
"134 0.976849 \n",
|
||
"77 0.978041 \n",
|
||
"72 0.979115 \n",
|
||
"6 0.980074 \n",
|
||
"96 0.981007 \n",
|
||
"15 0.981927 \n",
|
||
"26 0.982838 \n",
|
||
"70 0.983749 \n",
|
||
"157 0.984539 \n",
|
||
"2 0.985311 \n",
|
||
"146 0.986036 \n",
|
||
"161 0.986683 \n",
|
||
"166 0.987217 \n",
|
||
"94 0.987750 \n",
|
||
"71 0.988236 \n",
|
||
"93 0.988715 \n",
|
||
"14 0.989136 \n",
|
||
"23 0.989537 \n",
|
||
"33 0.989919 \n",
|
||
"64 0.990279 \n",
|
||
"159 0.990616 \n",
|
||
"141 0.990953 \n",
|
||
"109 0.991272 \n",
|
||
"150 0.991589 \n",
|
||
"136 0.991904 \n",
|
||
"12 0.992173 \n",
|
||
"158 0.992428 \n",
|
||
"43 0.992673 "
|
||
]
|
||
},
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"print_main_target('3', 70)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"id": "c66a4dc1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pd.set_option('display.max_rows', None)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"id": "5f34b8bf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_4/target_information.csv\n",
|
||
"Nombre de ciblage : 4627640\n",
|
||
"Nombre de client avec étiquette target : 320813\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>cumulative_customers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>232</th>\n",
|
||
" <td>Tous les contacts mis à jour</td>\n",
|
||
" <td>0.999991</td>\n",
|
||
" <td>0.069325</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>76</th>\n",
|
||
" <td>Base données</td>\n",
|
||
" <td>0.999991</td>\n",
|
||
" <td>0.138650</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>191</th>\n",
|
||
" <td>Office de Tourisme</td>\n",
|
||
" <td>0.999991</td>\n",
|
||
" <td>0.207974</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>128</th>\n",
|
||
" <td>Globale sans VIP</td>\n",
|
||
" <td>0.955488</td>\n",
|
||
" <td>0.274214</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>112</th>\n",
|
||
" <td>Contacts structures</td>\n",
|
||
" <td>0.929969</td>\n",
|
||
" <td>0.338684</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98</th>\n",
|
||
" <td>Cible gratuité IMA COMEDY</td>\n",
|
||
" <td>0.636246</td>\n",
|
||
" <td>0.382792</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>2 IEME ENVOI IMA COMEDY CLUB</td>\n",
|
||
" <td>0.630389</td>\n",
|
||
" <td>0.426494</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>64</th>\n",
|
||
" <td>Actions Marketing ARABOFOLIES</td>\n",
|
||
" <td>0.627917</td>\n",
|
||
" <td>0.470025</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>171</th>\n",
|
||
" <td>Liste globale sans VIP</td>\n",
|
||
" <td>0.582183</td>\n",
|
||
" <td>0.510385</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>126</th>\n",
|
||
" <td>Formulaire inscription mallette \"Cultures en partage\"</td>\n",
|
||
" <td>0.532831</td>\n",
|
||
" <td>0.547324</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>78</th>\n",
|
||
" <td>CAMPAGNE ADHESION 2023</td>\n",
|
||
" <td>0.449371</td>\n",
|
||
" <td>0.578477</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>234</th>\n",
|
||
" <td>Tous les optins</td>\n",
|
||
" <td>0.412546</td>\n",
|
||
" <td>0.607076</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>192</th>\n",
|
||
" <td>Optin 2023</td>\n",
|
||
" <td>0.365057</td>\n",
|
||
" <td>0.632384</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>170</th>\n",
|
||
" <td>Liste globale optin-15-01-2021</td>\n",
|
||
" <td>0.325482</td>\n",
|
||
" <td>0.654948</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>10-03-sb-dolist</td>\n",
|
||
" <td>0.193833</td>\n",
|
||
" <td>0.668386</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>195</th>\n",
|
||
" <td>Origine - Nouba</td>\n",
|
||
" <td>0.192452</td>\n",
|
||
" <td>0.681728</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>158</th>\n",
|
||
" <td>LIVE2022_Intérêt Expositions musée</td>\n",
|
||
" <td>0.173550</td>\n",
|
||
" <td>0.693759</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>414</th>\n",
|
||
" <td>old_Intéressés par la Musique</td>\n",
|
||
" <td>0.166505</td>\n",
|
||
" <td>0.705302</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>415</th>\n",
|
||
" <td>old_Intérêt Danse</td>\n",
|
||
" <td>0.163572</td>\n",
|
||
" <td>0.716642</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>100</th>\n",
|
||
" <td>Cible offre DAOUD DEPARDON</td>\n",
|
||
" <td>0.130372</td>\n",
|
||
" <td>0.725680</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>13-04-2022-vente 2021</td>\n",
|
||
" <td>0.128804</td>\n",
|
||
" <td>0.734609</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>73</th>\n",
|
||
" <td>Arabofolies Juillet 2022</td>\n",
|
||
" <td>0.109123</td>\n",
|
||
" <td>0.742174</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>137</th>\n",
|
||
" <td>Intérêt LGBTQ+</td>\n",
|
||
" <td>0.108917</td>\n",
|
||
" <td>0.749725</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>274</th>\n",
|
||
" <td>blacklistés ima</td>\n",
|
||
" <td>0.105407</td>\n",
|
||
" <td>0.757032</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>208</th>\n",
|
||
" <td>Public traditionnel</td>\n",
|
||
" <td>0.083821</td>\n",
|
||
" <td>0.762843</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>159</th>\n",
|
||
" <td>LIVE2022_Intérêt Humour</td>\n",
|
||
" <td>0.082858</td>\n",
|
||
" <td>0.768587</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>99</th>\n",
|
||
" <td>Cible jeunes humour</td>\n",
|
||
" <td>0.080312</td>\n",
|
||
" <td>0.774155</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>213</th>\n",
|
||
" <td>Relance gratuité IMA COMEDY CLUB</td>\n",
|
||
" <td>0.079205</td>\n",
|
||
" <td>0.779646</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>101</th>\n",
|
||
" <td>Cible rencontres et débats</td>\n",
|
||
" <td>0.076743</td>\n",
|
||
" <td>0.784966</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>189</th>\n",
|
||
" <td>Nouveaux inscrits newsletter</td>\n",
|
||
" <td>0.076200</td>\n",
|
||
" <td>0.790249</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>349</th>\n",
|
||
" <td>interet nuit du cinéma</td>\n",
|
||
" <td>0.072260</td>\n",
|
||
" <td>0.795258</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>141</th>\n",
|
||
" <td>Intérêt prononcé pour la nuit du ramadan</td>\n",
|
||
" <td>0.072254</td>\n",
|
||
" <td>0.800267</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>87</th>\n",
|
||
" <td>Cible Algérie</td>\n",
|
||
" <td>0.070337</td>\n",
|
||
" <td>0.805143</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>93</th>\n",
|
||
" <td>Cible News offre spéciale humour</td>\n",
|
||
" <td>0.069296</td>\n",
|
||
" <td>0.809947</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140</th>\n",
|
||
" <td>Intérêt musique électro</td>\n",
|
||
" <td>0.068716</td>\n",
|
||
" <td>0.814711</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>280</th>\n",
|
||
" <td>cible Histoire et feminisme</td>\n",
|
||
" <td>0.068585</td>\n",
|
||
" <td>0.819466</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>174</th>\n",
|
||
" <td>Liste relais pour présentation 2023</td>\n",
|
||
" <td>0.067404</td>\n",
|
||
" <td>0.824139</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>138</th>\n",
|
||
" <td>Intérêt musique Orientale</td>\n",
|
||
" <td>0.066082</td>\n",
|
||
" <td>0.828720</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>16-07-21-nuit-du-cinema</td>\n",
|
||
" <td>0.065166</td>\n",
|
||
" <td>0.833237</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>204</th>\n",
|
||
" <td>Profil Client Expos Divas (Geo)</td>\n",
|
||
" <td>0.063401</td>\n",
|
||
" <td>0.837633</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>265</th>\n",
|
||
" <td>araborolies/Divas/relance</td>\n",
|
||
" <td>0.061647</td>\n",
|
||
" <td>0.841906</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>203</th>\n",
|
||
" <td>Profil Client Expo Divas</td>\n",
|
||
" <td>0.061138</td>\n",
|
||
" <td>0.846145</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>233</th>\n",
|
||
" <td>Tous les inscrits aux newsletters via le formulaire du site web</td>\n",
|
||
" <td>0.057407</td>\n",
|
||
" <td>0.850125</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>243</th>\n",
|
||
" <td>VIP Générale</td>\n",
|
||
" <td>0.053682</td>\n",
|
||
" <td>0.853846</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>226</th>\n",
|
||
" <td>Strcutures sans VIP</td>\n",
|
||
" <td>0.053396</td>\n",
|
||
" <td>0.857548</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>67</th>\n",
|
||
" <td>Agi pour buren</td>\n",
|
||
" <td>0.051575</td>\n",
|
||
" <td>0.861123</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144</th>\n",
|
||
" <td>Invitation à l'exposition Palestine LANG</td>\n",
|
||
" <td>0.051092</td>\n",
|
||
" <td>0.864665</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>62</th>\n",
|
||
" <td>Acheteurs individuels de l'expo Juifs d'orient statique</td>\n",
|
||
" <td>0.046526</td>\n",
|
||
" <td>0.867891</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>61</th>\n",
|
||
" <td>Acheteurs individuels de l'expo Juifs d'orient</td>\n",
|
||
" <td>0.046513</td>\n",
|
||
" <td>0.871115</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>95</th>\n",
|
||
" <td>Cible arabic Sound system</td>\n",
|
||
" <td>0.046164</td>\n",
|
||
" <td>0.874316</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>244</th>\n",
|
||
" <td>VIP STATIQUE</td>\n",
|
||
" <td>0.041158</td>\n",
|
||
" <td>0.877169</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>245</th>\n",
|
||
" <td>VIP Téléchargement</td>\n",
|
||
" <td>0.040737</td>\n",
|
||
" <td>0.879993</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>102</th>\n",
|
||
" <td>Cible scolaire 2022</td>\n",
|
||
" <td>0.040313</td>\n",
|
||
" <td>0.882788</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>90</th>\n",
|
||
" <td>Cible Maroc</td>\n",
|
||
" <td>0.039827</td>\n",
|
||
" <td>0.885549</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>91</th>\n",
|
||
" <td>Cible Maroc</td>\n",
|
||
" <td>0.039827</td>\n",
|
||
" <td>0.888310</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>41</th>\n",
|
||
" <td>26mai-2023-Structures-invit-palestine</td>\n",
|
||
" <td>0.039188</td>\n",
|
||
" <td>0.891027</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>393</th>\n",
|
||
" <td>liste_contacts_agi_2021_02_16_</td>\n",
|
||
" <td>0.033618</td>\n",
|
||
" <td>0.893357</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>450</th>\n",
|
||
" <td>sb-fichier-eudonet-ok-18-05-21</td>\n",
|
||
" <td>0.032056</td>\n",
|
||
" <td>0.895579</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>04_11_22_eudonet</td>\n",
|
||
" <td>0.031857</td>\n",
|
||
" <td>0.897788</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>215</th>\n",
|
||
" <td>SB-18-05-VIP-eudonet</td>\n",
|
||
" <td>0.031857</td>\n",
|
||
" <td>0.899996</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>175</th>\n",
|
||
" <td>Liste vernissage</td>\n",
|
||
" <td>0.031364</td>\n",
|
||
" <td>0.902171</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>235</th>\n",
|
||
" <td>Tous les relais</td>\n",
|
||
" <td>0.031090</td>\n",
|
||
" <td>0.904326</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>252</th>\n",
|
||
" <td>Visiteurs expo pour questionnaires</td>\n",
|
||
" <td>0.029930</td>\n",
|
||
" <td>0.906401</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>223</th>\n",
|
||
" <td>Scolaires - Actions Educatives 24/11/2021</td>\n",
|
||
" <td>0.029871</td>\n",
|
||
" <td>0.908472</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>92</th>\n",
|
||
" <td>Cible Musique Judeo-arabe</td>\n",
|
||
" <td>0.029266</td>\n",
|
||
" <td>0.910501</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>15-09-2023-Cible-Palestine</td>\n",
|
||
" <td>0.028531</td>\n",
|
||
" <td>0.912478</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>LIVE2022_Intérêts Rencontres, débats et conférences</td>\n",
|
||
" <td>0.026928</td>\n",
|
||
" <td>0.914345</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>282</th>\n",
|
||
" <td>cible photo</td>\n",
|
||
" <td>0.026056</td>\n",
|
||
" <td>0.916152</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>38</th>\n",
|
||
" <td>26-MAI_STRUCTURE-2023-OK</td>\n",
|
||
" <td>0.025495</td>\n",
|
||
" <td>0.917919</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>07-12-20-Relais-invitatation-divas</td>\n",
|
||
" <td>0.024909</td>\n",
|
||
" <td>0.919646</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>410</th>\n",
|
||
" <td>old_Amis de l'IMA</td>\n",
|
||
" <td>0.023160</td>\n",
|
||
" <td>0.921251</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>222</th>\n",
|
||
" <td>Scolaires - Actions Educatives 24/01/2023</td>\n",
|
||
" <td>0.022724</td>\n",
|
||
" <td>0.922827</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>198</th>\n",
|
||
" <td>PALESTINE</td>\n",
|
||
" <td>0.020903</td>\n",
|
||
" <td>0.924276</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>249</th>\n",
|
||
" <td>Vignes et tilleuls</td>\n",
|
||
" <td>0.020439</td>\n",
|
||
" <td>0.925693</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>39</th>\n",
|
||
" <td>26-mai-11H10-relais</td>\n",
|
||
" <td>0.019578</td>\n",
|
||
" <td>0.927050</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110</th>\n",
|
||
" <td>Contacts Librairie</td>\n",
|
||
" <td>0.019114</td>\n",
|
||
" <td>0.928375</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>194</th>\n",
|
||
" <td>Origine - Inscription manuelle</td>\n",
|
||
" <td>0.018307</td>\n",
|
||
" <td>0.929644</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>196</th>\n",
|
||
" <td>Origine - QR code</td>\n",
|
||
" <td>0.018294</td>\n",
|
||
" <td>0.930913</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>59</th>\n",
|
||
" <td>Acheteurs Daoud Depardon</td>\n",
|
||
" <td>0.018232</td>\n",
|
||
" <td>0.932176</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>473</th>\n",
|
||
" <td>événements autour de Habibi</td>\n",
|
||
" <td>0.017755</td>\n",
|
||
" <td>0.933407</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>17-04-21-nuits-ducinema</td>\n",
|
||
" <td>0.017484</td>\n",
|
||
" <td>0.934619</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>07-12-20-liste-invites-presentation-divas</td>\n",
|
||
" <td>0.017337</td>\n",
|
||
" <td>0.935821</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>278</th>\n",
|
||
" <td>catégorie Cinéma</td>\n",
|
||
" <td>0.016748</td>\n",
|
||
" <td>0.936982</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>250</th>\n",
|
||
" <td>Visiteurs Palestine</td>\n",
|
||
" <td>0.016302</td>\n",
|
||
" <td>0.938113</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>248</th>\n",
|
||
" <td>Vernissages Algérie</td>\n",
|
||
" <td>0.014703</td>\n",
|
||
" <td>0.939132</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>242</th>\n",
|
||
" <td>VIP Algérie</td>\n",
|
||
" <td>0.014666</td>\n",
|
||
" <td>0.940149</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>445</th>\n",
|
||
" <td>save the date invités vernissage</td>\n",
|
||
" <td>0.014432</td>\n",
|
||
" <td>0.941149</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>205</th>\n",
|
||
" <td>Profil Contact Expo Divas - Juillet</td>\n",
|
||
" <td>0.014282</td>\n",
|
||
" <td>0.942139</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>17-04-2021-autres-liste-statique-cinema</td>\n",
|
||
" <td>0.013862</td>\n",
|
||
" <td>0.943100</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>279</th>\n",
|
||
" <td>catégorie rencontres et débats</td>\n",
|
||
" <td>0.013285</td>\n",
|
||
" <td>0.944021</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>325</th>\n",
|
||
" <td>fichier-dolist-05-12-20-relais</td>\n",
|
||
" <td>0.013163</td>\n",
|
||
" <td>0.944934</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>177</th>\n",
|
||
" <td>MAILING SAVE THE DATE PARFUMS</td>\n",
|
||
" <td>0.013154</td>\n",
|
||
" <td>0.945846</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96</th>\n",
|
||
" <td>Cible enfants/ famille janvier 23</td>\n",
|
||
" <td>0.012515</td>\n",
|
||
" <td>0.946713</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>129</th>\n",
|
||
" <td>Globale vernissage Samarcande</td>\n",
|
||
" <td>0.011078</td>\n",
|
||
" <td>0.947481</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>251</th>\n",
|
||
" <td>Visiteurs Palestine mi-expo questionnaire</td>\n",
|
||
" <td>0.010735</td>\n",
|
||
" <td>0.948225</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>88</th>\n",
|
||
" <td>Cible LIBAN</td>\n",
|
||
" <td>0.010003</td>\n",
|
||
" <td>0.948919</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>18-11-2021-liste i,augurationdu23-18h3à</td>\n",
|
||
" <td>0.009975</td>\n",
|
||
" <td>0.949610</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>431</th>\n",
|
||
" <td>professionnels de l'écologie</td>\n",
|
||
" <td>0.009769</td>\n",
|
||
" <td>0.950288</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>142</th>\n",
|
||
" <td>Intérêt écologie (pro)</td>\n",
|
||
" <td>0.009769</td>\n",
|
||
" <td>0.950965</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>323</th>\n",
|
||
" <td>fichier pro écologie</td>\n",
|
||
" <td>0.009769</td>\n",
|
||
" <td>0.951642</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" target_name \\\n",
|
||
"232 Tous les contacts mis à jour \n",
|
||
"76 Base données \n",
|
||
"191 Office de Tourisme \n",
|
||
"128 Globale sans VIP \n",
|
||
"112 Contacts structures \n",
|
||
"98 Cible gratuité IMA COMEDY \n",
|
||
"23 2 IEME ENVOI IMA COMEDY CLUB \n",
|
||
"64 Actions Marketing ARABOFOLIES \n",
|
||
"171 Liste globale sans VIP \n",
|
||
"126 Formulaire inscription mallette \"Cultures en partage\" \n",
|
||
"78 CAMPAGNE ADHESION 2023 \n",
|
||
"234 Tous les optins \n",
|
||
"192 Optin 2023 \n",
|
||
"170 Liste globale optin-15-01-2021 \n",
|
||
"10 10-03-sb-dolist \n",
|
||
"195 Origine - Nouba \n",
|
||
"158 LIVE2022_Intérêt Expositions musée \n",
|
||
"414 old_Intéressés par la Musique \n",
|
||
"415 old_Intérêt Danse \n",
|
||
"100 Cible offre DAOUD DEPARDON \n",
|
||
"12 13-04-2022-vente 2021 \n",
|
||
"73 Arabofolies Juillet 2022 \n",
|
||
"137 Intérêt LGBTQ+ \n",
|
||
"274 blacklistés ima \n",
|
||
"208 Public traditionnel \n",
|
||
"159 LIVE2022_Intérêt Humour \n",
|
||
"99 Cible jeunes humour \n",
|
||
"213 Relance gratuité IMA COMEDY CLUB \n",
|
||
"101 Cible rencontres et débats \n",
|
||
"189 Nouveaux inscrits newsletter \n",
|
||
"349 interet nuit du cinéma \n",
|
||
"141 Intérêt prononcé pour la nuit du ramadan \n",
|
||
"87 Cible Algérie \n",
|
||
"93 Cible News offre spéciale humour \n",
|
||
"140 Intérêt musique électro \n",
|
||
"280 cible Histoire et feminisme \n",
|
||
"174 Liste relais pour présentation 2023 \n",
|
||
"138 Intérêt musique Orientale \n",
|
||
"18 16-07-21-nuit-du-cinema \n",
|
||
"204 Profil Client Expos Divas (Geo) \n",
|
||
"265 araborolies/Divas/relance \n",
|
||
"203 Profil Client Expo Divas \n",
|
||
"233 Tous les inscrits aux newsletters via le formulaire du site web \n",
|
||
"243 VIP Générale \n",
|
||
"226 Strcutures sans VIP \n",
|
||
"67 Agi pour buren \n",
|
||
"144 Invitation à l'exposition Palestine LANG \n",
|
||
"62 Acheteurs individuels de l'expo Juifs d'orient statique \n",
|
||
"61 Acheteurs individuels de l'expo Juifs d'orient \n",
|
||
"95 Cible arabic Sound system \n",
|
||
"244 VIP STATIQUE \n",
|
||
"245 VIP Téléchargement \n",
|
||
"102 Cible scolaire 2022 \n",
|
||
"90 Cible Maroc \n",
|
||
"91 Cible Maroc \n",
|
||
"41 26mai-2023-Structures-invit-palestine \n",
|
||
"393 liste_contacts_agi_2021_02_16_ \n",
|
||
"450 sb-fichier-eudonet-ok-18-05-21 \n",
|
||
"4 04_11_22_eudonet \n",
|
||
"215 SB-18-05-VIP-eudonet \n",
|
||
"175 Liste vernissage \n",
|
||
"235 Tous les relais \n",
|
||
"252 Visiteurs expo pour questionnaires \n",
|
||
"223 Scolaires - Actions Educatives 24/11/2021 \n",
|
||
"92 Cible Musique Judeo-arabe \n",
|
||
"13 15-09-2023-Cible-Palestine \n",
|
||
"162 LIVE2022_Intérêts Rencontres, débats et conférences \n",
|
||
"282 cible photo \n",
|
||
"38 26-MAI_STRUCTURE-2023-OK \n",
|
||
"5 07-12-20-Relais-invitatation-divas \n",
|
||
"410 old_Amis de l'IMA \n",
|
||
"222 Scolaires - Actions Educatives 24/01/2023 \n",
|
||
"198 PALESTINE \n",
|
||
"249 Vignes et tilleuls \n",
|
||
"39 26-mai-11H10-relais \n",
|
||
"110 Contacts Librairie \n",
|
||
"194 Origine - Inscription manuelle \n",
|
||
"196 Origine - QR code \n",
|
||
"59 Acheteurs Daoud Depardon \n",
|
||
"473 événements autour de Habibi \n",
|
||
"20 17-04-21-nuits-ducinema \n",
|
||
"6 07-12-20-liste-invites-presentation-divas \n",
|
||
"278 catégorie Cinéma \n",
|
||
"250 Visiteurs Palestine \n",
|
||
"248 Vernissages Algérie \n",
|
||
"242 VIP Algérie \n",
|
||
"445 save the date invités vernissage \n",
|
||
"205 Profil Contact Expo Divas - Juillet \n",
|
||
"19 17-04-2021-autres-liste-statique-cinema \n",
|
||
"279 catégorie rencontres et débats \n",
|
||
"325 fichier-dolist-05-12-20-relais \n",
|
||
"177 MAILING SAVE THE DATE PARFUMS \n",
|
||
"96 Cible enfants/ famille janvier 23 \n",
|
||
"129 Globale vernissage Samarcande \n",
|
||
"251 Visiteurs Palestine mi-expo questionnaire \n",
|
||
"88 Cible LIBAN \n",
|
||
"21 18-11-2021-liste i,augurationdu23-18h3à \n",
|
||
"431 professionnels de l'écologie \n",
|
||
"142 Intérêt écologie (pro) \n",
|
||
"323 fichier pro écologie \n",
|
||
"\n",
|
||
" customer_id cumulative_customers \n",
|
||
"232 0.999991 0.069325 \n",
|
||
"76 0.999991 0.138650 \n",
|
||
"191 0.999991 0.207974 \n",
|
||
"128 0.955488 0.274214 \n",
|
||
"112 0.929969 0.338684 \n",
|
||
"98 0.636246 0.382792 \n",
|
||
"23 0.630389 0.426494 \n",
|
||
"64 0.627917 0.470025 \n",
|
||
"171 0.582183 0.510385 \n",
|
||
"126 0.532831 0.547324 \n",
|
||
"78 0.449371 0.578477 \n",
|
||
"234 0.412546 0.607076 \n",
|
||
"192 0.365057 0.632384 \n",
|
||
"170 0.325482 0.654948 \n",
|
||
"10 0.193833 0.668386 \n",
|
||
"195 0.192452 0.681728 \n",
|
||
"158 0.173550 0.693759 \n",
|
||
"414 0.166505 0.705302 \n",
|
||
"415 0.163572 0.716642 \n",
|
||
"100 0.130372 0.725680 \n",
|
||
"12 0.128804 0.734609 \n",
|
||
"73 0.109123 0.742174 \n",
|
||
"137 0.108917 0.749725 \n",
|
||
"274 0.105407 0.757032 \n",
|
||
"208 0.083821 0.762843 \n",
|
||
"159 0.082858 0.768587 \n",
|
||
"99 0.080312 0.774155 \n",
|
||
"213 0.079205 0.779646 \n",
|
||
"101 0.076743 0.784966 \n",
|
||
"189 0.076200 0.790249 \n",
|
||
"349 0.072260 0.795258 \n",
|
||
"141 0.072254 0.800267 \n",
|
||
"87 0.070337 0.805143 \n",
|
||
"93 0.069296 0.809947 \n",
|
||
"140 0.068716 0.814711 \n",
|
||
"280 0.068585 0.819466 \n",
|
||
"174 0.067404 0.824139 \n",
|
||
"138 0.066082 0.828720 \n",
|
||
"18 0.065166 0.833237 \n",
|
||
"204 0.063401 0.837633 \n",
|
||
"265 0.061647 0.841906 \n",
|
||
"203 0.061138 0.846145 \n",
|
||
"233 0.057407 0.850125 \n",
|
||
"243 0.053682 0.853846 \n",
|
||
"226 0.053396 0.857548 \n",
|
||
"67 0.051575 0.861123 \n",
|
||
"144 0.051092 0.864665 \n",
|
||
"62 0.046526 0.867891 \n",
|
||
"61 0.046513 0.871115 \n",
|
||
"95 0.046164 0.874316 \n",
|
||
"244 0.041158 0.877169 \n",
|
||
"245 0.040737 0.879993 \n",
|
||
"102 0.040313 0.882788 \n",
|
||
"90 0.039827 0.885549 \n",
|
||
"91 0.039827 0.888310 \n",
|
||
"41 0.039188 0.891027 \n",
|
||
"393 0.033618 0.893357 \n",
|
||
"450 0.032056 0.895579 \n",
|
||
"4 0.031857 0.897788 \n",
|
||
"215 0.031857 0.899996 \n",
|
||
"175 0.031364 0.902171 \n",
|
||
"235 0.031090 0.904326 \n",
|
||
"252 0.029930 0.906401 \n",
|
||
"223 0.029871 0.908472 \n",
|
||
"92 0.029266 0.910501 \n",
|
||
"13 0.028531 0.912478 \n",
|
||
"162 0.026928 0.914345 \n",
|
||
"282 0.026056 0.916152 \n",
|
||
"38 0.025495 0.917919 \n",
|
||
"5 0.024909 0.919646 \n",
|
||
"410 0.023160 0.921251 \n",
|
||
"222 0.022724 0.922827 \n",
|
||
"198 0.020903 0.924276 \n",
|
||
"249 0.020439 0.925693 \n",
|
||
"39 0.019578 0.927050 \n",
|
||
"110 0.019114 0.928375 \n",
|
||
"194 0.018307 0.929644 \n",
|
||
"196 0.018294 0.930913 \n",
|
||
"59 0.018232 0.932176 \n",
|
||
"473 0.017755 0.933407 \n",
|
||
"20 0.017484 0.934619 \n",
|
||
"6 0.017337 0.935821 \n",
|
||
"278 0.016748 0.936982 \n",
|
||
"250 0.016302 0.938113 \n",
|
||
"248 0.014703 0.939132 \n",
|
||
"242 0.014666 0.940149 \n",
|
||
"445 0.014432 0.941149 \n",
|
||
"205 0.014282 0.942139 \n",
|
||
"19 0.013862 0.943100 \n",
|
||
"279 0.013285 0.944021 \n",
|
||
"325 0.013163 0.944934 \n",
|
||
"177 0.013154 0.945846 \n",
|
||
"96 0.012515 0.946713 \n",
|
||
"129 0.011078 0.947481 \n",
|
||
"251 0.010735 0.948225 \n",
|
||
"88 0.010003 0.948919 \n",
|
||
"21 0.009975 0.949610 \n",
|
||
"431 0.009769 0.950288 \n",
|
||
"142 0.009769 0.950965 \n",
|
||
"323 0.009769 0.951642 "
|
||
]
|
||
},
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"print_main_target('4', 100)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "40fe3676",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"print_main_target('101', 100)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "605cced5-052f-4a99-ac26-020c5d2ab633",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## KPI sur tags"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "916c3e2b-04d3-4877-b894-8f26f10d926e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"customersplus = load_dataset_2(\"4\", \"customersplus\")[['id', 'structure_id']]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"id": "46847b24-15a4-464e-969f-f16ed3653f1f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"structure_tag_mappings = load_dataset_2('4', \"structure_tag_mappings\")[['structure_id', 'tag_id']]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"id": "3c10c69d-735f-453e-96bf-750697d965d0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"19427"
|
||
]
|
||
},
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customersplus[customersplus['structure_id'].notna()]['structure_id'].nunique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "9b0e77b3-5f16-4484-9564-7d3826583418",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"33645"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(customersplus[customersplus['structure_id'].notna()])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"id": "dfa27722-37f9-435a-8221-8aa6f9a4a107",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"3431"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"structure_tag_mappings['structure_id'].nunique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "2daabdd5-31e3-4918-9856-9bbc30cde602",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def tags_information(tenant_id, first_tags):\n",
|
||
"\n",
|
||
" customersplus = load_dataset_2(tenant_id, \"customersplus\")[['id', 'structure_id']]\n",
|
||
" customersplus.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
|
||
" tags = load_dataset_2(tenant_id, \"tags\")[['id', 'name']]\n",
|
||
" tags.rename(columns = {'id' : 'tag_id', 'name' : 'tag_name'}, inplace = True)\n",
|
||
" structure_tag_mappings = load_dataset_2(tenant_id, \"structure_tag_mappings\")[['structure_id', 'tag_id']]\n",
|
||
" \n",
|
||
" customer_tags = pd.merge(customersplus, structure_tag_mappings, on = 'structure_id', how = 'left')\n",
|
||
" customer_tags = pd.merge(customer_tags, tags, on = 'tag_id', how = 'inner')\n",
|
||
" \n",
|
||
" nb_customers_with_tag = customer_tags['customer_id'].nunique()\n",
|
||
" \n",
|
||
" print('Nombre de client avec tag : ', nb_customers_with_tag)\n",
|
||
" print('Proportion de clients avec tags : ', nb_customers_with_tag/len(customersplus))\n",
|
||
" print('Moyenne de tags par client : ', len(customer_tags)/nb_customers_with_tag)\n",
|
||
" \n",
|
||
" info = customer_tags.groupby(['tag_id', 'tag_name'])['customer_id'].count().reset_index().sort_values('customer_id', ascending = False).head(first_tags)\n",
|
||
"\n",
|
||
" return info"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "0b9f5f71-a927-4cc8-bb0c-9538e28d3553",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de client avec tag : 13320\n",
|
||
"Proportion de clients avec tags : 0.0877089012682233\n",
|
||
"Moyenne de tags par client : 2.1725975975975977\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>tag_id</th>\n",
|
||
" <th>tag_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11029.0</td>\n",
|
||
" <td>individuels</td>\n",
|
||
" <td>3270</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>11047.0</td>\n",
|
||
" <td>groupes scolaires</td>\n",
|
||
" <td>2417</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>11033.0</td>\n",
|
||
" <td>association</td>\n",
|
||
" <td>2308</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>11028.0</td>\n",
|
||
" <td>structures culturelles</td>\n",
|
||
" <td>2011</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>11051.0</td>\n",
|
||
" <td>etablissement ens scolaire</td>\n",
|
||
" <td>1732</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>11036.0</td>\n",
|
||
" <td>champ social</td>\n",
|
||
" <td>1603</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>43</th>\n",
|
||
" <td>11072.0</td>\n",
|
||
" <td>etab d'enseignement</td>\n",
|
||
" <td>1036</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>11043.0</td>\n",
|
||
" <td>etablissement public</td>\n",
|
||
" <td>935</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>11035.0</td>\n",
|
||
" <td>organisme de tourisme</td>\n",
|
||
" <td>892</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>11045.0</td>\n",
|
||
" <td>centre de loisirs</td>\n",
|
||
" <td>864</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>44</th>\n",
|
||
" <td>11073.0</td>\n",
|
||
" <td>musée, site & fondation</td>\n",
|
||
" <td>786</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24</th>\n",
|
||
" <td>11053.0</td>\n",
|
||
" <td>groupes etudiants</td>\n",
|
||
" <td>758</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11032.0</td>\n",
|
||
" <td>entreprise</td>\n",
|
||
" <td>750</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>11039.0</td>\n",
|
||
" <td>etablissement d'enseignement</td>\n",
|
||
" <td>741</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>11034.0</td>\n",
|
||
" <td>asso. culturelle</td>\n",
|
||
" <td>692</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>11044.0</td>\n",
|
||
" <td>administration et collectivité</td>\n",
|
||
" <td>676</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>11046.0</td>\n",
|
||
" <td>tour opérateur</td>\n",
|
||
" <td>642</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>11048.0</td>\n",
|
||
" <td>entreprises</td>\n",
|
||
" <td>515</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>72</th>\n",
|
||
" <td>11619.0</td>\n",
|
||
" <td>structures culturelles;musée, site & fondation</td>\n",
|
||
" <td>427</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>11037.0</td>\n",
|
||
" <td>handicap</td>\n",
|
||
" <td>426</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" tag_id tag_name customer_id\n",
|
||
"1 11029.0 individuels 3270\n",
|
||
"18 11047.0 groupes scolaires 2417\n",
|
||
"4 11033.0 association 2308\n",
|
||
"0 11028.0 structures culturelles 2011\n",
|
||
"22 11051.0 etablissement ens scolaire 1732\n",
|
||
"7 11036.0 champ social 1603\n",
|
||
"43 11072.0 etab d'enseignement 1036\n",
|
||
"14 11043.0 etablissement public 935\n",
|
||
"6 11035.0 organisme de tourisme 892\n",
|
||
"16 11045.0 centre de loisirs 864\n",
|
||
"44 11073.0 musée, site & fondation 786\n",
|
||
"24 11053.0 groupes etudiants 758\n",
|
||
"3 11032.0 entreprise 750\n",
|
||
"10 11039.0 etablissement d'enseignement 741\n",
|
||
"5 11034.0 asso. culturelle 692\n",
|
||
"15 11044.0 administration et collectivité 676\n",
|
||
"17 11046.0 tour opérateur 642\n",
|
||
"19 11048.0 entreprises 515\n",
|
||
"72 11619.0 structures culturelles;musée, site & fondation 427\n",
|
||
"8 11037.0 handicap 426"
|
||
]
|
||
},
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tags_information(\"1\", 20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"id": "bd5bef41-1774-4601-86b5-b7c1aea8f1d2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de client avec tag : 5953\n",
|
||
"Proportion de clients avec tags : 0.021598421025897787\n",
|
||
"Moyenne de tags par client : 1.0\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>tag_id</th>\n",
|
||
" <th>tag_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>training-sb-ax</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" tag_id tag_name customer_id\n",
|
||
"0 1.0 training-sb-ax 5"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tags_information(\"2\", 20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"id": "7c2dc3e6-1418-44db-a8c0-4a9d59ec5232",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>training-sb-ax</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name\n",
|
||
"0 1 training-sb-ax\n",
|
||
"1 2 NaN"
|
||
]
|
||
},
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"load_dataset_2(\"2\", \"tags\")[['id', 'name']]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"id": "c7b2c670-7122-4f67-b1aa-8c80a10f16d8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de client avec tag : 23659\n",
|
||
"Proportion de clients avec tags : 0.09207484608139978\n",
|
||
"Moyenne de tags par client : 3.0620482691576143\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>tag_id</th>\n",
|
||
" <th>tag_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>164</th>\n",
|
||
" <td>44539.0</td>\n",
|
||
" <td>*individuel/particulier</td>\n",
|
||
" <td>13148</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>30</th>\n",
|
||
" <td>26926.0</td>\n",
|
||
" <td>ce</td>\n",
|
||
" <td>3216</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>6995.0</td>\n",
|
||
" <td>college</td>\n",
|
||
" <td>2126</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>7028.0</td>\n",
|
||
" <td>lycee</td>\n",
|
||
" <td>1577</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>154</th>\n",
|
||
" <td>44524.0</td>\n",
|
||
" <td>iraiser</td>\n",
|
||
" <td>1453</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>6714.0</td>\n",
|
||
" <td>ecole primaire</td>\n",
|
||
" <td>1200</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>155</th>\n",
|
||
" <td>44525.0</td>\n",
|
||
" <td>bp</td>\n",
|
||
" <td>1094</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>7024.0</td>\n",
|
||
" <td>centre de loisirs</td>\n",
|
||
" <td>1080</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>153</th>\n",
|
||
" <td>44515.0</td>\n",
|
||
" <td>entreprise</td>\n",
|
||
" <td>998</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>126</th>\n",
|
||
" <td>44039.0</td>\n",
|
||
" <td>ca fondation d'aumale</td>\n",
|
||
" <td>891</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152</th>\n",
|
||
" <td>44514.0</td>\n",
|
||
" <td>particulier</td>\n",
|
||
" <td>838</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>36</th>\n",
|
||
" <td>43663.0</td>\n",
|
||
" <td>président</td>\n",
|
||
" <td>816</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>76</th>\n",
|
||
" <td>43703.0</td>\n",
|
||
" <td>directeur</td>\n",
|
||
" <td>812</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>158</th>\n",
|
||
" <td>44528.0</td>\n",
|
||
" <td>dc</td>\n",
|
||
" <td>807</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>54</th>\n",
|
||
" <td>43681.0</td>\n",
|
||
" <td>présidente</td>\n",
|
||
" <td>805</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>149</th>\n",
|
||
" <td>44511.0</td>\n",
|
||
" <td>entreprise (financier)</td>\n",
|
||
" <td>805</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>90</th>\n",
|
||
" <td>43718.0</td>\n",
|
||
" <td>conseillère régionale déléguée titulaire</td>\n",
|
||
" <td>804</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>40</th>\n",
|
||
" <td>43667.0</td>\n",
|
||
" <td>directeur de l'agence</td>\n",
|
||
" <td>801</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>78</th>\n",
|
||
" <td>43705.0</td>\n",
|
||
" <td>sous-préfet</td>\n",
|
||
" <td>798</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>100</th>\n",
|
||
" <td>43728.0</td>\n",
|
||
" <td>chargée de mission paysage</td>\n",
|
||
" <td>797</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" tag_id tag_name customer_id\n",
|
||
"164 44539.0 *individuel/particulier 13148\n",
|
||
"30 26926.0 ce 3216\n",
|
||
"14 6995.0 college 2126\n",
|
||
"16 7028.0 lycee 1577\n",
|
||
"154 44524.0 iraiser 1453\n",
|
||
"2 6714.0 ecole primaire 1200\n",
|
||
"155 44525.0 bp 1094\n",
|
||
"15 7024.0 centre de loisirs 1080\n",
|
||
"153 44515.0 entreprise 998\n",
|
||
"126 44039.0 ca fondation d'aumale 891\n",
|
||
"152 44514.0 particulier 838\n",
|
||
"36 43663.0 président 816\n",
|
||
"76 43703.0 directeur 812\n",
|
||
"158 44528.0 dc 807\n",
|
||
"54 43681.0 présidente 805\n",
|
||
"149 44511.0 entreprise (financier) 805\n",
|
||
"90 43718.0 conseillère régionale déléguée titulaire 804\n",
|
||
"40 43667.0 directeur de l'agence 801\n",
|
||
"78 43705.0 sous-préfet 798\n",
|
||
"100 43728.0 chargée de mission paysage 797"
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tags_information(\"3\", 20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"id": "76639995-252d-4a58-83d8-c0c00900c3a9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de client avec tag : 10495\n",
|
||
"Proportion de clients avec tags : 0.03271416949025744\n",
|
||
"Moyenne de tags par client : 5.298427822772749\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>tag_id</th>\n",
|
||
" <th>tag_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>147</th>\n",
|
||
" <td>298.0</td>\n",
|
||
" <td>jhima</td>\n",
|
||
" <td>4219</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>146</th>\n",
|
||
" <td>297.0</td>\n",
|
||
" <td>colloque algérie</td>\n",
|
||
" <td>3851</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>142</th>\n",
|
||
" <td>292.0</td>\n",
|
||
" <td>i&ma</td>\n",
|
||
" <td>3826</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>154</th>\n",
|
||
" <td>305.0</td>\n",
|
||
" <td>mardis de la philo</td>\n",
|
||
" <td>3674</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>150</th>\n",
|
||
" <td>301.0</td>\n",
|
||
" <td>le grand continant</td>\n",
|
||
" <td>3670</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144</th>\n",
|
||
" <td>295.0</td>\n",
|
||
" <td>araborama</td>\n",
|
||
" <td>3669</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>155</th>\n",
|
||
" <td>306.0</td>\n",
|
||
" <td>marie descourtieux</td>\n",
|
||
" <td>3669</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>145</th>\n",
|
||
" <td>296.0</td>\n",
|
||
" <td>c'était la guerre d'algérie</td>\n",
|
||
" <td>3669</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>141</th>\n",
|
||
" <td>291.0</td>\n",
|
||
" <td>araborama 3</td>\n",
|
||
" <td>3669</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>102</th>\n",
|
||
" <td>198.0</td>\n",
|
||
" <td>association de collectivités territoriales spé...</td>\n",
|
||
" <td>3669</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>143</th>\n",
|
||
" <td>294.0</td>\n",
|
||
" <td>arabofolies</td>\n",
|
||
" <td>3669</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>103</th>\n",
|
||
" <td>199.0</td>\n",
|
||
" <td>rassemble les 11 000 élus de toute la france a...</td>\n",
|
||
" <td>3669</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>association</td>\n",
|
||
" <td>463</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>54.0</td>\n",
|
||
" <td>collège</td>\n",
|
||
" <td>446</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>49.0</td>\n",
|
||
" <td>ecole</td>\n",
|
||
" <td>374</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>lycée</td>\n",
|
||
" <td>275</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>53.0</td>\n",
|
||
" <td>centre social</td>\n",
|
||
" <td>200</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>53</th>\n",
|
||
" <td>130.0</td>\n",
|
||
" <td>cultures et arts</td>\n",
|
||
" <td>141</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>51.0</td>\n",
|
||
" <td>mairie</td>\n",
|
||
" <td>136</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>64.0</td>\n",
|
||
" <td>formation_ima_ax</td>\n",
|
||
" <td>87</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" tag_id tag_name customer_id\n",
|
||
"147 298.0 jhima 4219\n",
|
||
"146 297.0 colloque algérie 3851\n",
|
||
"142 292.0 i&ma 3826\n",
|
||
"154 305.0 mardis de la philo 3674\n",
|
||
"150 301.0 le grand continant 3670\n",
|
||
"144 295.0 araborama 3669\n",
|
||
"155 306.0 marie descourtieux 3669\n",
|
||
"145 296.0 c'était la guerre d'algérie 3669\n",
|
||
"141 291.0 araborama 3 3669\n",
|
||
"102 198.0 association de collectivités territoriales spé... 3669\n",
|
||
"143 294.0 arabofolies 3669\n",
|
||
"103 199.0 rassemble les 11 000 élus de toute la france a... 3669\n",
|
||
"2 50.0 association 463\n",
|
||
"6 54.0 collège 446\n",
|
||
"1 49.0 ecole 374\n",
|
||
"7 55.0 lycée 275\n",
|
||
"5 53.0 centre social 200\n",
|
||
"53 130.0 cultures et arts 141\n",
|
||
"3 51.0 mairie 136\n",
|
||
"13 64.0 formation_ima_ax 87"
|
||
]
|
||
},
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tags_information(\"4\", 20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"id": "07e91791-d4d4-42b1-ac18-22d3b0b9f7bd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de client avec tag : 532342\n",
|
||
"Proportion de clients avec tags : 0.18660686931118298\n",
|
||
"Moyenne de tags par client : 24.114082676174338\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>tag_id</th>\n",
|
||
" <th>tag_name</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>349.0</td>\n",
|
||
" <td>clients internet</td>\n",
|
||
" <td>517491</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24</th>\n",
|
||
" <td>356.0</td>\n",
|
||
" <td>associations / clubs</td>\n",
|
||
" <td>495520</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>agence de voyages</td>\n",
|
||
" <td>493774</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>32</th>\n",
|
||
" <td>410.0</td>\n",
|
||
" <td>guides conférenciers</td>\n",
|
||
" <td>493378</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>360.0</td>\n",
|
||
" <td>groupe amis ou famille</td>\n",
|
||
" <td>493021</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>354.0</td>\n",
|
||
" <td>ce / entreprises</td>\n",
|
||
" <td>493016</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>association/club</td>\n",
|
||
" <td>493008</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>c.e. / entreprise</td>\n",
|
||
" <td>492656</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>college</td>\n",
|
||
" <td>492552</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>69.0</td>\n",
|
||
" <td>tour operator</td>\n",
|
||
" <td>492549</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>ecole primaire</td>\n",
|
||
" <td>492540</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>31</th>\n",
|
||
" <td>379.0</td>\n",
|
||
" <td>parent goûter anniversaire</td>\n",
|
||
" <td>492468</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>30</th>\n",
|
||
" <td>364.0</td>\n",
|
||
" <td>institutions</td>\n",
|
||
" <td>492364</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>institution</td>\n",
|
||
" <td>492321</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>186.0</td>\n",
|
||
" <td>autocaristes</td>\n",
|
||
" <td>492153</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>enseignement superieur</td>\n",
|
||
" <td>492131</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25</th>\n",
|
||
" <td>359.0</td>\n",
|
||
" <td>hotels / campings</td>\n",
|
||
" <td>492078</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>42</th>\n",
|
||
" <td>7186.0</td>\n",
|
||
" <td>individuel</td>\n",
|
||
" <td>491913</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>groupe amis / famille</td>\n",
|
||
" <td>491900</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>client internet</td>\n",
|
||
" <td>491896</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" tag_id tag_name customer_id\n",
|
||
"20 349.0 clients internet 517491\n",
|
||
"24 356.0 associations / clubs 495520\n",
|
||
"5 10.0 agence de voyages 493774\n",
|
||
"32 410.0 guides conférenciers 493378\n",
|
||
"26 360.0 groupe amis ou famille 493021\n",
|
||
"23 354.0 ce / entreprises 493016\n",
|
||
"8 17.0 association/club 493008\n",
|
||
"1 3.0 c.e. / entreprise 492656\n",
|
||
"6 11.0 college 492552\n",
|
||
"13 69.0 tour operator 492549\n",
|
||
"4 9.0 ecole primaire 492540\n",
|
||
"31 379.0 parent goûter anniversaire 492468\n",
|
||
"30 364.0 institutions 492364\n",
|
||
"2 6.0 institution 492321\n",
|
||
"18 186.0 autocaristes 492153\n",
|
||
"7 13.0 enseignement superieur 492131\n",
|
||
"25 359.0 hotels / campings 492078\n",
|
||
"42 7186.0 individuel 491913\n",
|
||
"3 7.0 groupe amis / famille 491900\n",
|
||
"0 2.0 client internet 491896"
|
||
]
|
||
},
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tags_information(\"101\", 20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "87d131cd-ead0-4ef4-a8ee-b09022d08ffa",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## KPI product"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "26582be9-cfd1-48ea-a0a7-31101fdeb9d1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ticket_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>supplier_name</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>end_date_time</th>\n",
|
||
" <th>open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>13070859</td>\n",
|
||
" <td>48187</td>\n",
|
||
" <td>5107462</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>vente en ligne</td>\n",
|
||
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>indiv prog enfant</td>\n",
|
||
" <td>l'école des magiciens</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>2018-12-31 14:15:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>13070860</td>\n",
|
||
" <td>48187</td>\n",
|
||
" <td>5107462</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>vente en ligne</td>\n",
|
||
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>indiv prog enfant</td>\n",
|
||
" <td>l'école des magiciens</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>2018-12-31 14:15:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>13070861</td>\n",
|
||
" <td>48187</td>\n",
|
||
" <td>5107462</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>vente en ligne</td>\n",
|
||
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>indiv prog enfant</td>\n",
|
||
" <td>l'école des magiciens</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>2018-12-31 14:15:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13070862</td>\n",
|
||
" <td>48187</td>\n",
|
||
" <td>5107462</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>vente en ligne</td>\n",
|
||
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>indiv prog enfant</td>\n",
|
||
" <td>l'école des magiciens</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>2018-12-31 14:15:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>13070863</td>\n",
|
||
" <td>48187</td>\n",
|
||
" <td>5107462</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>vente en ligne</td>\n",
|
||
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>indiv prog enfant</td>\n",
|
||
" <td>l'école des magiciens</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>2018-12-31 14:15:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
|
||
"0 13070859 48187 5107462 4 vente en ligne \n",
|
||
"1 13070860 48187 5107462 4 vente en ligne \n",
|
||
"2 13070861 48187 5107462 4 vente en ligne \n",
|
||
"3 13070862 48187 5107462 4 vente en ligne \n",
|
||
"4 13070863 48187 5107462 4 vente en ligne \n",
|
||
"\n",
|
||
" purchase_date amount is_full_price name_event_types \\\n",
|
||
"0 2018-12-28 14:47:50+00:00 8.0 False spectacle vivant \n",
|
||
"1 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
|
||
"2 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
|
||
"3 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
|
||
"4 2018-12-28 14:47:50+00:00 4.0 False spectacle vivant \n",
|
||
"\n",
|
||
" name_facilities name_categories name_events name_seasons \\\n",
|
||
"0 mucem indiv prog enfant l'école des magiciens 2018 \n",
|
||
"1 mucem indiv prog enfant l'école des magiciens 2018 \n",
|
||
"2 mucem indiv prog enfant l'école des magiciens 2018 \n",
|
||
"3 mucem indiv prog enfant l'école des magiciens 2018 \n",
|
||
"4 mucem indiv prog enfant l'école des magiciens 2018 \n",
|
||
"\n",
|
||
" start_date_time end_date_time open \n",
|
||
"0 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
|
||
"1 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
|
||
"2 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
|
||
"3 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True \n",
|
||
"4 2018-12-31 14:15:00+01:00 1901-01-01 00:09:21+00:09 True "
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tenant_id = \"1\"\n",
|
||
"\n",
|
||
"df_product = display_databases(tenant_id, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
|
||
"\n",
|
||
"df_product.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "533bf499-dd56-4d29-b261-ca1e4928c9c7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>ticket_id</th>\n",
|
||
" <th>prop_tickets</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>118</th>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" <td>visite générale du mucem (1h30)</td>\n",
|
||
" <td>43814</td>\n",
|
||
" <td>0.024</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>212</th>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>visite autonome scolaires (2h00)</td>\n",
|
||
" <td>34423</td>\n",
|
||
" <td>0.019</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>68</th>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" <td>visite autonome exposition (1h30)</td>\n",
|
||
" <td>26489</td>\n",
|
||
" <td>0.015</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>210</th>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>visite autonome adultes (2h00)</td>\n",
|
||
" <td>22065</td>\n",
|
||
" <td>0.012</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>160</th>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" <td>visites des exterieurs scolaires</td>\n",
|
||
" <td>15595</td>\n",
|
||
" <td>0.009</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>364</th>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>kay ! lettres à un poète disparu</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>443</th>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mauvais genre</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>375</th>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>la madre que parió a la música</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>260</th>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>ali a les yeux bleus (dès 12 ans)</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>484</th>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>rengaine (dès 12 ans)</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>544 rows × 4 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" name_event_types name_events ticket_id \\\n",
|
||
"118 offre muséale groupe visite générale du mucem (1h30) 43814 \n",
|
||
"212 offre muséale individuel visite autonome scolaires (2h00) 34423 \n",
|
||
"68 offre muséale groupe visite autonome exposition (1h30) 26489 \n",
|
||
"210 offre muséale individuel visite autonome adultes (2h00) 22065 \n",
|
||
"160 offre muséale groupe visites des exterieurs scolaires 15595 \n",
|
||
".. ... ... ... \n",
|
||
"364 spectacle vivant kay ! lettres à un poète disparu 1 \n",
|
||
"443 spectacle vivant mauvais genre 1 \n",
|
||
"375 spectacle vivant la madre que parió a la música 1 \n",
|
||
"260 spectacle vivant ali a les yeux bleus (dès 12 ans) 1 \n",
|
||
"484 spectacle vivant rengaine (dès 12 ans) 1 \n",
|
||
"\n",
|
||
" prop_tickets \n",
|
||
"118 0.024 \n",
|
||
"212 0.019 \n",
|
||
"68 0.015 \n",
|
||
"210 0.012 \n",
|
||
"160 0.009 \n",
|
||
".. ... \n",
|
||
"364 0.000 \n",
|
||
"443 0.000 \n",
|
||
"375 0.000 \n",
|
||
"260 0.000 \n",
|
||
"484 0.000 \n",
|
||
"\n",
|
||
"[544 rows x 4 columns]"
|
||
]
|
||
},
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"nb_tickets_per_events = df_product.groupby(['name_event_types', 'name_events'])['ticket_id'].count().reset_index().sort_values('ticket_id', ascending = False)\n",
|
||
"nb_tickets_per_events['prop_tickets'] = round(nb_tickets_per_events['ticket_id']/len(df_product), 3)\n",
|
||
"nb_tickets_per_events"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"# Fin travail 25/02"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c437eaec",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"# Exemple sur Company 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a1c1fc39",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Chargement données"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "66f8c17b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"BUCKET = \"bdc2324-data/1\"\n",
|
||
"liste_database = fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "c08e6798",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'type_ofs']\n",
|
||
"\n",
|
||
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
|
||
"liste_database_filtered = [element for element in liste_database if any(element_part in element for element_part in liste_database_select)]\n",
|
||
"\n",
|
||
"# Afficher le résultat\n",
|
||
"print(liste_database_filtered)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "675f518d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<<<<<<< local <modified: >\n",
|
||
"/tmp/ipykernel_445/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" df = pd.read_csv(file_in)\n",
|
||
"=======\n",
|
||
"/tmp/ipykernel_15285/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" df = pd.read_csv(file_in)\n",
|
||
">>>>>>> remote <modified: >\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# loop to create dataframes from liste\n",
|
||
"files_path = liste_database\n",
|
||
"\n",
|
||
"client_number = files_path[0].split(\"/\")[1]\n",
|
||
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
||
"\n",
|
||
"for i in range(len(files_path)) :\n",
|
||
" current_path = files_path[i]\n",
|
||
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in)\n",
|
||
" # the pattern of the name is df1xxx\n",
|
||
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
|
||
" globals()[nom_dataframe] = df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e855f403",
|
||
"metadata": {},
|
||
"source": [
|
||
"## customersplus.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "91a8f8c4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"a = pd.DataFrame(df1_customersplus.info())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "2fda171d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def info_colonnes_dataframe(df):\n",
|
||
" # Créer une liste pour stocker les informations sur chaque colonne\n",
|
||
" infos_colonnes = []\n",
|
||
"\n",
|
||
" # Parcourir les colonnes du DataFrame\n",
|
||
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
|
||
" # Calculer le taux de valeurs manquantes\n",
|
||
" taux_na = serie.isna().mean() * 100\n",
|
||
"\n",
|
||
" # Ajouter les informations à la liste\n",
|
||
" infos_colonnes.append({\n",
|
||
" 'Nom_colonne': nom_colonne,\n",
|
||
" 'Type_colonne': str(serie.dtype),\n",
|
||
" 'Taux_NA': taux_na\n",
|
||
" })\n",
|
||
"\n",
|
||
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
|
||
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
|
||
"\n",
|
||
" return df_infos_colonnes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"id": "205eeeab",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def cleaning_date(df, column_name):\n",
|
||
" \"\"\"\n",
|
||
" Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
|
||
"\n",
|
||
" Parameters:\n",
|
||
" - df: DataFrame\n",
|
||
" Le DataFrame contenant la colonne à nettoyer.\n",
|
||
" - column_name: str\n",
|
||
" Le nom de la colonne à nettoyer.\n",
|
||
"\n",
|
||
" Returns:\n",
|
||
" - DataFrame\n",
|
||
" Le DataFrame modifié avec la colonne nettoyée.\n",
|
||
" \"\"\"\n",
|
||
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "634282c5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"a = info_colonnes_dataframe(df1_customersplus)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "0e8d4133",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"a"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "1268ad5a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"a = pd.DataFrame(df1_customersplus.isna().sum()/len(df1_customersplus)*100)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"id": "bd41dc80",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Selection des variables\n",
|
||
"df1_customersplus_clean = df1_customersplus.copy()\n",
|
||
"\n",
|
||
"cleaning_date(df1_customersplus_clean, 'first_buying_date')\n",
|
||
"cleaning_date(df1_customersplus_clean, 'last_visiting_date')\n",
|
||
"\n",
|
||
"df1_customersplus_clean.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n",
|
||
"df1_customersplus_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "64d0f76b",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## tickets.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "7e683711",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_tickets"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "e7b9a52e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_tickets.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "568280e8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_tickets.isna().sum()/len(df1_tickets)*100"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "29ecec90",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Selection des variables\n",
|
||
"df1_tickets_clean = df1_tickets.drop(['lastname', 'firstname', 'email', 'created_at', 'updated_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode'], axis = 1, inplace=True)\n",
|
||
"df1_tickets_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "22bb5de4",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## suppliers.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "6a9a91f4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_suppliers"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "bab4758a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_suppliers.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "b5fff251",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_suppliers.isna().sum()/len(df1_suppliers)*100"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "8b09e2a3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Selection des variables\n",
|
||
"df1_suppliers_clean = df1_suppliers[['id', 'name']]\n",
|
||
"df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "ecee7cdc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_suppliers_clean"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c8e6e69b",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## type_ofs.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "1a6cff1f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_type_ofs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "93630b41",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_type_ofs.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "4f94481a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Selection des variables\n",
|
||
"df1_type_ofs_clean = df1_type_ofs[['id', 'name', 'children']]\n",
|
||
"df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1b2811e2",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## purchases.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "2455d2e1",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_purchases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "5f9a159d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_purchases.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "db201bf7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Nettoyage purchase_date\n",
|
||
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], utc = True)\n",
|
||
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], format = 'ISO8601')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "bd436fca",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_purchases.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "83435862",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Selection des variables\n",
|
||
"df1_purchases_clean = df1_purchases[['id', 'purchase_date', 'customer_id']]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f210e730",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## Fusion de l'ensemble des données billétiques"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"id": "1f8b3aa7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Fusion avec fournisseurs\n",
|
||
"df1_ticket_information = pd.merge(df1_tickets_clean, df1_suppliers_clean, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
|
||
"df1_ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
|
||
"\n",
|
||
"# Fusion avec type de tickets\n",
|
||
"df1_ticket_information = pd.merge(df1_ticket_information, df1_type_ofs_clean, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
|
||
"df1_ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
|
||
"\n",
|
||
"# Fusion avec achats\n",
|
||
"df1_ticket_information = pd.merge(df1_ticket_information, df1_purchases_clean, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
|
||
"df1_ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "83a4d021",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_ticket_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "56e6ebd1",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"# Utilisation de fonctions"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"id": "88fcde4b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Créer un DataFrame exemple\n",
|
||
"df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n",
|
||
"\n",
|
||
"# Appliquer la fonction pour nettoyer la colonne 'purchase_date' de manière vectorisée\n",
|
||
"df_clean = cleaning_date(df_not_clean, 'opened_at')\n",
|
||
"df_clean.rename(columns = {'opened_at' : 'opened_at_clean'}, inplace = True)\n",
|
||
"\n",
|
||
"test = pd.concat([df1_campaign_stats[['opened_at']].head(20), df_clean], axis=1)\n",
|
||
"\n",
|
||
"test.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "818f69db",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Nettoyage, selection et fusion"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"id": "c9654eda",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_ticket_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "7f2b620c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_ticket_information.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "637bdb72",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Customer information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "14c52894",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Target area"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "d83abfbf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_15285/2625134041.py:3: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Target.csv cleaning\n",
|
||
"df1_targets_clean = df1_targets[[\"id\", \"target_type_id\", \"name\"]]\n",
|
||
"df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
|
||
"\n",
|
||
"# target_type cleaning\n",
|
||
"df1_target_types_clean = df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
|
||
"\n",
|
||
"#customer_target_mappings cleaning\n",
|
||
"df1_customer_target_mappings_clean = df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
|
||
"\n",
|
||
"# Merge target et target_type\n",
|
||
"df1_targets_full = pd.merge(df1_targets_clean, df1_target_types_clean, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
|
||
"df1_targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
|
||
"\n",
|
||
"# Merge\n",
|
||
"df1_targets_full = pd.merge(df1_customer_target_mappings_clean, df1_targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
|
||
"df1_targets_full.drop(['target_id'], axis = 1, inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"id": "90d71b2c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n",
|
||
"len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n",
|
||
"\n",
|
||
"# 99,6% des 151 000 client visés sont catégorisés plusieurs fois et en moyenne 5 fois... \n",
|
||
"df1_targets_test.mean()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "2301de1e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>target_type_is_import</th>\n",
|
||
" <th>target_type_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1184824</td>\n",
|
||
" <td>645400</td>\n",
|
||
" <td>DDCP PROMO Réseau livres</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>210571</td>\n",
|
||
" <td>2412</td>\n",
|
||
" <td>DDCP PROMO Réseau livres</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>210572</td>\n",
|
||
" <td>4536</td>\n",
|
||
" <td>DDCP PROMO Réseau livres</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>210573</td>\n",
|
||
" <td>6736</td>\n",
|
||
" <td>DDCP PROMO Réseau livres</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>210574</td>\n",
|
||
" <td>38210</td>\n",
|
||
" <td>DDCP PROMO Réseau livres</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_name target_type_is_import \\\n",
|
||
"0 1184824 645400 DDCP PROMO Réseau livres False \n",
|
||
"1 210571 2412 DDCP PROMO Réseau livres False \n",
|
||
"2 210572 4536 DDCP PROMO Réseau livres False \n",
|
||
"3 210573 6736 DDCP PROMO Réseau livres False \n",
|
||
"4 210574 38210 DDCP PROMO Réseau livres False \n",
|
||
"\n",
|
||
" target_type_name \n",
|
||
"0 manual_static_filter \n",
|
||
"1 manual_static_filter \n",
|
||
"2 manual_static_filter \n",
|
||
"3 manual_static_filter \n",
|
||
"4 manual_static_filter "
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df1_targets_full.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "75fbc2f7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
|
||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||
"[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
|
||
"[nltk_data] Package stopwords is already up-to-date!\n",
|
||
"[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
|
||
"[nltk_data] Package wordnet is already up-to-date!\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"True"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Catégorisation des target_name\n",
|
||
"import pandas as pd\n",
|
||
"import nltk\n",
|
||
"from nltk.tokenize import word_tokenize\n",
|
||
"from nltk.corpus import stopwords\n",
|
||
"from nltk.stem import WordNetLemmatizer\n",
|
||
"from nltk.probability import FreqDist\n",
|
||
"\n",
|
||
"# Téléchargement des ressources nécessaires\n",
|
||
"nltk.download('punkt')\n",
|
||
"nltk.download('stopwords')\n",
|
||
"nltk.download('wordnet')\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "55cddf92",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Mots les plus fréquents:\n",
|
||
"consentement: 550777\n",
|
||
"optin: 463579\n",
|
||
"jeune: 155103\n",
|
||
"public: 155103\n",
|
||
"mediation: 150001\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n",
|
||
"def preprocess_text(texte):\n",
|
||
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
|
||
" texte_concat = ' '.join(texte)\n",
|
||
" \n",
|
||
" # Tokenisation des mots\n",
|
||
" tokens = word_tokenize(texte_concat.lower())\n",
|
||
" \n",
|
||
" # Suppression des mots vides (stopwords)\n",
|
||
" stop_words = set(stopwords.words('french'))\n",
|
||
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
|
||
" \n",
|
||
" # Lemmatisation des mots\n",
|
||
" lemmatizer = WordNetLemmatizer()\n",
|
||
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
|
||
" \n",
|
||
" return lemmatized_tokens\n",
|
||
"\n",
|
||
"\n",
|
||
"# Appliquer le prétraitement à la colonne de texte\n",
|
||
"df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n",
|
||
"\n",
|
||
"# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n",
|
||
"all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n",
|
||
"\n",
|
||
"# Calculer la fréquence des mots\n",
|
||
"freq_dist = FreqDist(all_words)\n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "7fd98a85",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Mots les plus fréquents:\n",
|
||
"consentement: 550777\n",
|
||
"optin: 463579\n",
|
||
"jeune: 155103\n",
|
||
"public: 155103\n",
|
||
"mediation: 150001\n",
|
||
"specialisee: 150001\n",
|
||
"b2c: 143432\n",
|
||
"optout: 97683\n",
|
||
"newsletter: 56022\n",
|
||
"(: 46084\n",
|
||
"): 46084\n",
|
||
"inscrits: 42296\n",
|
||
"nl: 42294\n",
|
||
"générale: 41037\n",
|
||
"generale: 40950\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Affichage des mots les plus fréquents\n",
|
||
"print(\"Mots les plus fréquents:\")\n",
|
||
"for mot, freq in freq_dist.most_common(15):\n",
|
||
" print(f\"{mot}: {freq}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "cf94bb1d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" texte \\\n",
|
||
"0 Le chat noir mange une souris. \n",
|
||
"1 Le chien blanc aboie. \n",
|
||
"\n",
|
||
" texte_preprocessed \n",
|
||
"0 [e, h, a, o, i, r, a, g, e, u, e, o, u, r, i, .] \n",
|
||
"1 [e, h, i, e, b, a, a, b, o, i, e, .] \n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
|
||
"[nltk_data] Package punkt is already up-to-date!\n",
|
||
"[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
|
||
"[nltk_data] Package stopwords is already up-to-date!\n",
|
||
"[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
|
||
"[nltk_data] Package wordnet is already up-to-date!\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import nltk\n",
|
||
"from nltk.tokenize import word_tokenize\n",
|
||
"from nltk.corpus import stopwords\n",
|
||
"from nltk.stem import WordNetLemmatizer\n",
|
||
"\n",
|
||
"# Téléchargement des ressources nécessaires\n",
|
||
"nltk.download('punkt')\n",
|
||
"nltk.download('stopwords')\n",
|
||
"nltk.download('wordnet')\n",
|
||
"\n",
|
||
"# Création de la DataFrame d'exemple\n",
|
||
"data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n",
|
||
"df = pd.DataFrame(data)\n",
|
||
"\n",
|
||
"# Fonction pour prétraiter le texte\n",
|
||
"def preprocess_text(texte):\n",
|
||
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
|
||
" texte_concat = ' '.join(texte)\n",
|
||
" \n",
|
||
" # Tokenisation des mots\n",
|
||
" tokens = word_tokenize(texte_concat.lower())\n",
|
||
" \n",
|
||
" # Suppression des mots vides (stopwords)\n",
|
||
" stop_words = set(stopwords.words('french'))\n",
|
||
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
|
||
" \n",
|
||
" # Lemmatisation des mots\n",
|
||
" lemmatizer = WordNetLemmatizer()\n",
|
||
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
|
||
" \n",
|
||
" return lemmatized_tokens\n",
|
||
"\n",
|
||
"# Appliquer la fonction de prétraitement à la colonne de texte\n",
|
||
"df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n",
|
||
"\n",
|
||
"# Afficher le résultat\n",
|
||
"print(df)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "711d3884",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## Campaign area"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"id": "c25b5295",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# campaign_stats cleaning \n",
|
||
"df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
|
||
"cleaning_date(df1_campaign_stats_clean, 'opened_at')\n",
|
||
"cleaning_date(df1_campaign_stats_clean, 'sent_at')\n",
|
||
"cleaning_date(df1_campaign_stats_clean, 'delivered_at')\n",
|
||
"\n",
|
||
"# campaigns cleaning\n",
|
||
"df1_campaigns_clean = df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
|
||
"cleaning_date(df1_campaigns_clean, 'campaign_sent_at')\n",
|
||
"\n",
|
||
"# Merge \n",
|
||
"df1_campaigns_full = pd.merge(df1_campaign_stats_clean, df1_campaigns_clean, on = \"campaign_id\", how = \"left\")\n",
|
||
"df1_campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"id": "2a3de6a5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_campaigns_full.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"id": "3fc1f446",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_campaigns_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "20e69ee3",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## Link area"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "d9cbdbce",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_campaigns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"id": "c07459f0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_link_stats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "80ae4c42",
|
||
"metadata": {
|
||
"jp-MarkdownHeadingCollapsed": true
|
||
},
|
||
"source": [
|
||
"## Supplier"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "b50b8f95",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Fonction d'exploration pour suppliers.csv = label itr et commission inconnues\n",
|
||
"def suppliers_exploration(suppliers = None) : \n",
|
||
" \n",
|
||
" # Taux de NaN pour ces colonnes\n",
|
||
" label_na = suppliers['label'].isna().sum()/len(suppliers)*100\n",
|
||
" itr_na = suppliers['itr'].isna().sum()/len(suppliers)*100\n",
|
||
" commission_na = suppliers['commission'].isna().sum()/len(suppliers)*100\n",
|
||
"\n",
|
||
" suppliers_desc = pd.DataFrame({'nb_suppliers' : [suppliers['name'].nunique()],\n",
|
||
" 'label_na' : [label_na],\n",
|
||
" 'itr_na' : [itr_na],\n",
|
||
" 'commission_na' : [commission_na]})\n",
|
||
"\n",
|
||
" return suppliers_desc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "7e292935",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df1_suppliers_desc = suppliers_exploration(suppliers = df1_suppliers)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "05b6f2b0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>label_na</th>\n",
|
||
" <th>itr_na</th>\n",
|
||
" <th>commission_na</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_suppliers label_na itr_na commission_na\n",
|
||
"0 9 100.0 100.0 100.0"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df1_suppliers_desc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "c9324d80",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"BUCKET = \"bdc2324-data\"\n",
|
||
"liste_folders = fs.ls(BUCKET)\n",
|
||
"\n",
|
||
"liste_files = []\n",
|
||
"for company_folder in liste_folders : \n",
|
||
" liste_files.extend(fs.ls(company_folder))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "10304058",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"['bdc2324-data/1/1suppliers.csv', 'bdc2324-data/10/10suppliers.csv', 'bdc2324-data/101/101suppliers.csv', 'bdc2324-data/11/11suppliers.csv', 'bdc2324-data/12/12suppliers.csv', 'bdc2324-data/13/13suppliers.csv', 'bdc2324-data/14/14suppliers.csv', 'bdc2324-data/2/2suppliers.csv', 'bdc2324-data/3/3suppliers.csv', 'bdc2324-data/4/4suppliers.csv', 'bdc2324-data/5/5suppliers.csv', 'bdc2324-data/6/6suppliers.csv', 'bdc2324-data/7/7suppliers.csv', 'bdc2324-data/8/8suppliers.csv', 'bdc2324-data/9/9suppliers.csv']\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"liste_database_select = ['suppliers']\n",
|
||
"\n",
|
||
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
|
||
"liste_suppliers = [element for element in liste_files if any(element_part in element for element_part in liste_database_select)]\n",
|
||
"\n",
|
||
"# Afficher le résultat\n",
|
||
"print(liste_suppliers)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "ffa423e5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# loop to create dataframes from file 2\n",
|
||
"def database_loading(database_name = None):\n",
|
||
" files_path = database_name\n",
|
||
" \n",
|
||
" client_number = files_path.split(\"/\")[1]\n",
|
||
" df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
||
" \n",
|
||
" current_path = files_path\n",
|
||
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in)\n",
|
||
"\n",
|
||
" return df, client_number"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "70bdc88d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "6a0f567d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_all = pd.DataFrame()\n",
|
||
"\n",
|
||
"for link in liste_suppliers:\n",
|
||
" \n",
|
||
" df_supplier, tenant_id = database_loading(link)\n",
|
||
" \n",
|
||
" df_supplier['tenant_id'] = int(tenant_id)\n",
|
||
"\n",
|
||
" df_all = pd.concat([df_all, df_supplier], axis = 0)\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"id": "1522d8cd",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# df_all[df_all['tenant_id'] == 101]['name'].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"id": "b0e42a61",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] \n",
|
||
"# vad = vente à distance\n",
|
||
"df_all['name'] = df_all['name'].fillna('')\n",
|
||
"\n",
|
||
"df_all['canal_vente_internet'] = df_all['name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"id": "d299ae91",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"tenant_id\n",
|
||
"1 1\n",
|
||
"2 1\n",
|
||
"3 1\n",
|
||
"4 1\n",
|
||
"5 1\n",
|
||
"6 1\n",
|
||
"7 1\n",
|
||
"8 1\n",
|
||
"9 1\n",
|
||
"10 1\n",
|
||
"11 1\n",
|
||
"12 1\n",
|
||
"13 1\n",
|
||
"14 1\n",
|
||
"101 1\n",
|
||
"Name: canal_vente_internet, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_all.groupby('tenant_id')['canal_vente_internet'].max()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|