BDC-team-1/Exploration_billet_AJ.ipynb

6008 lines
208 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "5bf5c226",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
2024-02-26 22:47:36 +01:00
"execution_count": 1,
2024-02-10 22:46:56 +01:00
"id": "b1a5b9d3",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
2024-01-13 10:38:10 +01:00
"import numpy as np\n",
"import os\n",
"import s3fs\n",
2024-02-25 23:53:10 +01:00
"import re\n",
"import warnings"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "ecfa2219",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
2024-02-26 22:47:36 +01:00
"execution_count": 2,
2024-02-10 22:46:56 +01:00
"id": "1a094277",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
2024-01-13 10:38:10 +01:00
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
2024-02-25 18:33:24 +01:00
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 3,
2024-02-25 18:33:24 +01:00
"id": "30d77451-2df6-4c07-8b15-66e0e990ff03",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"\n",
"# Import cleaning and merge functions\n",
2024-03-13 23:24:38 +01:00
"\n",
"exec(open('0_Cleaning_and_merge_functions.py').read())\n",
"\n",
2024-02-25 18:33:24 +01:00
"exec(open('0_KPI_functions.py').read())\n",
"\n",
"# Ignore warning\n",
"warnings.filterwarnings('ignore')\n"
]
},
{
"cell_type": "code",
2024-02-26 22:47:36 +01:00
"execution_count": 4,
2024-02-25 18:33:24 +01:00
"id": "f1b44d3e-76bb-4860-b9db-a2840db7cf39",
"metadata": {},
"outputs": [],
"source": [
"def load_dataset_2(directory_path, file_name):\n",
" \"\"\"\n",
" This function loads csv file\n",
" \"\"\"\n",
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\")\n",
"\n",
" # drop na :\n",
" #df = df.dropna(axis=1, thresh=len(df))\n",
" # if identifier in table : delete it\n",
" if 'identifier' in df.columns:\n",
" df = df.drop(columns = 'identifier')\n",
" return df"
]
},
{
2024-03-13 23:24:38 +01:00
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 5,
2024-03-13 23:24:38 +01:00
"id": "31ab76f0-fbb1-46f6-b359-97228620c207",
2024-03-04 23:30:25 +01:00
"metadata": {},
2024-03-13 23:24:38 +01:00
"outputs": [],
2024-02-25 18:33:24 +01:00
"source": [
2024-03-13 23:24:38 +01:00
"def export_in_temporary(df, output_name):\n",
" print('Export of dataset :', output_name)\n",
" FILE_PATH_OUT_S3 = \"projet-bdc2324-team1/Temporary\" + \"/\" + output_name + '.csv'\n",
" with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
" df.to_csv(file_out, index = False)"
2024-02-25 18:33:24 +01:00
]
},
{
2024-03-13 23:24:38 +01:00
"cell_type": "markdown",
"id": "ccf597b0-b459-4ea5-baf0-5ba8c90915e4",
2024-02-25 18:33:24 +01:00
"metadata": {},
"source": [
2024-03-13 23:24:38 +01:00
"# Cleaning target area and tags"
2024-02-25 18:33:24 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-03-13 23:24:38 +01:00
"id": "28316e1d-7892-4506-9d53-0695e71aa7bc",
2024-02-25 18:33:24 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-25 18:33:24 +01:00
"source": [
2024-03-13 23:24:38 +01:00
"target_example = preprocessing_target_area('1')"
2024-02-25 18:33:24 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 6,
2024-03-13 23:24:38 +01:00
"id": "fd88e294-e038-4cec-ad94-2bbbc10a4059",
2024-02-25 18:33:24 +01:00
"metadata": {},
2024-03-13 23:24:38 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-23 10:48:47 +01:00
" <th>target_jeune</th>\n",
" <th>target_optin</th>\n",
" <th>target_optout</th>\n",
" <th>target_scolaire</th>\n",
" <th>target_entreprise</th>\n",
" <th>target_famille</th>\n",
" <th>target_newsletter</th>\n",
" </tr>\n",
" <tr>\n",
2024-03-13 23:24:38 +01:00
" <th>customer_id</th>\n",
2024-03-23 10:48:47 +01:00
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-23 10:48:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-23 10:48:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-23 10:48:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2024-03-23 10:48:47 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>5</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-23 10:48:47 +01:00
" target_jeune target_optin target_optout target_scolaire \\\n",
"customer_id \n",
"1 1 1 1 1 \n",
"2 1 1 1 1 \n",
"3 1 1 0 0 \n",
"4 1 1 0 0 \n",
"5 1 1 0 0 \n",
2024-03-13 23:24:38 +01:00
"\n",
2024-03-23 10:48:47 +01:00
" target_entreprise target_famille target_newsletter \n",
"customer_id \n",
"1 1 0 0 \n",
"2 1 0 1 \n",
"3 0 0 1 \n",
"4 0 0 0 \n",
"5 0 0 0 "
2024-03-13 23:24:38 +01:00
]
},
2024-03-23 10:48:47 +01:00
"execution_count": 6,
2024-03-13 23:24:38 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-02-25 18:33:24 +01:00
"source": [
2024-03-13 23:24:38 +01:00
"tenant_id = '1'\n",
2024-02-25 18:33:24 +01:00
"\n",
2024-03-13 23:24:38 +01:00
"def concatenate_names(names):\n",
" return ', '.join(names)\n",
" \n",
2024-03-23 10:48:47 +01:00
"target_example =display_input_databases(tenant_id, \"target_information\")\n",
2024-02-25 18:33:24 +01:00
"\n",
2024-03-13 23:24:38 +01:00
"target_example['target_name'] = target_example['target_name'].fillna('').str.lower()\n",
"\n",
"\n",
2024-03-23 10:48:47 +01:00
"target_example['target_jeune'] = target_example['target_name'].str.contains('|'.join(['jeune', 'pass_culture', 'etudiant', '12-25 ans', 'student', 'jeunesse']), case=False).astype(int)\n",
"target_example['target_optin'] = target_example['target_name'].str.contains('|'.join(['optin' ,'opt-in']), case=False).astype(int)\n",
"target_example['target_optout'] = target_example['target_name'].str.contains('|'.join(['optout', 'unsubscribed']), case=False).astype(int)\n",
"target_example['target_scolaire'] = target_example['target_name'].str.contains('|'.join(['scolaire' , 'enseignant', 'chercheur', 'schulen', 'école']), case=False).astype(int)\n",
"target_example['target_entreprise'] = target_example['target_name'].str.contains('|'.join(['b2b', 'btob', 'cse']), case=False).astype(int)\n",
"target_example['target_famille'] = target_example['target_name'].str.contains('|'.join(['famille', 'enfants', 'family']), case=False).astype(int)\n",
"target_example['target_newsletter'] = target_example['target_name'].str.contains('|'.join(['nl', 'newsletter']), case=False).astype(int)\n",
2024-03-13 23:24:38 +01:00
"\n",
"\n",
"\n",
"target_agg = target_example.groupby('customer_id').agg(\n",
" nb_targets=('target_name', 'nunique'), # Utilisation de tuples pour spécifier les noms de colonnes\n",
" all_targets=('target_name', concatenate_names),\n",
" all_target_types=('target_type_name', concatenate_names)\n",
2024-03-23 10:48:47 +01:00
" ).reset_index()\n",
"\n",
"target_example_categorie = target_example.groupby('customer_id')[['target_jeune', 'target_optin', 'target_optout', 'target_scolaire', 'target_entreprise', 'target_famille', 'target_newsletter']].max()\n",
"\n",
"target_example_categorie.head()"
2024-02-25 18:33:24 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-03-13 23:24:38 +01:00
"id": "c75efea3-b5e8-4a7a-bed4-dd64ae9ff9f2",
2024-02-25 18:33:24 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-25 18:33:24 +01:00
"source": [
2024-03-23 10:48:47 +01:00
"#export_in_temporary(target_agg, 'Target_kpi_concatenate')"
2024-02-25 18:33:24 +01:00
]
},
2024-03-14 23:35:25 +01:00
{
2024-03-23 10:48:47 +01:00
"cell_type": "markdown",
"id": "5d91263e-8a97-4cb1-8d94-db8ab0b77cdf",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
2024-03-14 23:35:25 +01:00
"source": [
2024-03-23 10:48:47 +01:00
"# Brouillon"
2024-03-14 23:35:25 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5e864b1-adad-4267-b956-3f7ef371d677",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def display_covering_time(df, company, datecover):\n",
" \"\"\"\n",
" This function draws the time coverage of each company\n",
" \"\"\"\n",
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
" return datecover\n",
"\n",
"\n",
"def compute_time_intersection(datecover):\n",
" \"\"\"\n",
" This function returns the time coverage for all companies\n",
" \"\"\"\n",
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
" intersection = set.intersection(*timestamps_sets)\n",
" intersection_list = list(intersection)\n",
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
" return sorted(formated_dates)\n",
"\n",
"\n",
"def df_coverage_modelization(sport, coverage_features = 0.7):\n",
" \"\"\"\n",
" This function returns start_date, end_of_features and final dates\n",
" that help to construct train and test datasets\n",
" \"\"\"\n",
" datecover = {}\n",
" for company in sport:\n",
" df_products_purchased_reduced = display_input_databases(company, file_name = \"products_purchased_reduced\",\n",
" datetime_col = ['purchase_date'])\n",
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
" #print(datecover.keys())\n",
" dt_coverage = compute_time_intersection(datecover)\n",
" start_date = dt_coverage[0]\n",
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
" final_date = dt_coverage[-1]\n",
" return start_date, end_of_features, final_date\n",
" "
]
},
2024-02-25 18:33:24 +01:00
{
"cell_type": "markdown",
2024-03-13 23:24:38 +01:00
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
2024-02-26 22:47:36 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
2024-02-25 18:33:24 +01:00
"source": [
2024-03-13 23:24:38 +01:00
"# Bases non communes : mise à plat"
2024-02-25 23:53:10 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-03-13 23:24:38 +01:00
"id": "f8f988fb-5aab-4b57-80d1-e242f7e5b384",
2024-02-25 23:53:10 +01:00
"metadata": {},
"outputs": [],
"source": [
2024-03-13 23:24:38 +01:00
"companies = {'musee' : ['1', '2', '3', '4'],\n",
" 'sport': ['5', '6', '7', '8', '9'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}\n",
2024-02-25 23:53:10 +01:00
"\n",
2024-03-13 23:24:38 +01:00
"all_companies = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']"
2024-02-25 23:53:10 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-03-13 23:24:38 +01:00
"id": "35ac004f-c191-4f45-a4b1-6d993d9ec38c",
2024-02-25 23:53:10 +01:00
"metadata": {},
"outputs": [],
"source": [
2024-03-13 23:24:38 +01:00
"companies_databases = pd.DataFrame()\n",
2024-02-25 23:53:10 +01:00
"\n",
2024-03-13 23:24:38 +01:00
"for i in all_companies:\n",
" company_databases = pd.DataFrame({'company_number' : [i]})\n",
2024-02-25 23:53:10 +01:00
"\n",
2024-03-13 23:24:38 +01:00
" BUCKET = \"bdc2324-data/\"+i\n",
" for base in fs.ls(BUCKET):\n",
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
" if match:\n",
" nom_base = match.group(3)\n",
" company_databases[nom_base] = 1\n",
2024-02-25 23:53:10 +01:00
"\n",
2024-03-13 23:24:38 +01:00
" companies_databases = pd.concat([companies_databases, company_databases])"
2024-02-26 22:47:36 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-03-13 23:24:38 +01:00
"id": "8986e477-e6c5-4d6c-83b2-2c90c134b599",
2024-02-26 22:47:36 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-03-13 23:24:38 +01:00
"source": [
"pd.set_option(\"display.max_columns\", None)\n",
"companies_databases\n"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-03-13 23:24:38 +01:00
"id": "8fecc3bb-4c03-4144-97c5-615224d9729e",
"metadata": {},
"outputs": [],
"source": [
"pd.reset_option(\"display.max_columns\")"
]
},
{
"cell_type": "markdown",
"id": "0294ce71-840e-458b-8ffa-cadabbc6da21",
"metadata": {},
"source": [
"# Debut Travail 25/02"
]
},
{
"cell_type": "markdown",
"id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Base communes au types Musée"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbce1124-9a22-4502-a47a-fc3d0e2db70b",
"metadata": {},
"outputs": [],
"source": [
"companies['musee']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5080f66e-f779-410a-876d-b4fe2795e17e",
"metadata": {},
"outputs": [],
"source": [
"for i in companies['musique']:\n",
" BUCKET = \"bdc2324-data/\"+i\n",
" liste_base = []\n",
" for base in fs.ls(BUCKET):\n",
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
" if match:\n",
" nom_base = match.group(3)\n",
" liste_base.append(nom_base)\n",
" globals()['base_'+i] = liste_base\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abd477e1-7479-4c88-a5aa-f987af3f5b79",
"metadata": {},
"outputs": [],
"source": [
"# Trouver l'intersection entre les cinq listes\n",
"intersection = set(base_1).intersection(base_2, base_3, base_4, base_101)\n",
"\n",
"# Convertir le résultat en liste si nécessaire\n",
"intersection_liste = list(intersection)\n",
"\n",
"print(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d93888f-a511-4ee5-8bc3-d5173a7f119e",
"metadata": {},
"outputs": [],
"source": [
"# Trouver l'intersection entre les cinq listes\n",
"intersection = set(base_10).intersection(base_12, base_13, base_14, base_11)\n",
"\n",
"# Convertir le résultat en liste si nécessaire\n",
"intersection_liste = list(intersection)\n",
"\n",
"print(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10e89669-42bb-4652-a4bc-1a3d1caf4d1a",
"metadata": {},
"outputs": [],
"source": [
"len(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d058b21-a538-4f59-aefb-ef7966f73fdc",
"metadata": {},
"outputs": [],
"source": [
"df1_tags = load_dataset_2(\"1\", \"tags\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa441f99-733c-4675-8676-bed4682d3324",
"metadata": {},
"outputs": [],
"source": [
"df1_structure_tag_mappings = load_dataset_2(\"1\", 'structure_tag_mappings')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6767a750-14a4-4c05-903e-d2f07170825b",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus = load_dataset_2(\"1\", \"customersplus\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "125e9145-a815-46fd-bdf4-07589508b259",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus.groupby('structure_id')['id'].count().reset_index().sort_values('id', ascending=False).head(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c17a6976-792f-474d-bcff-c89396eddb3f",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus['structure_id'].isna().sum() / len(df1_customersplus['structure_id'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecfc155a-cb42-46ec-8da5-33fdcd087355",
"metadata": {},
"outputs": [],
"source": [
"len(df1_structure_tag_mappings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "071410b8-950d-4fcc-b2b9-57415253c286",
"metadata": {},
"outputs": [],
"source": [
"df1_structure_tag_mappings.groupby('tag_id')['structure_id'].count().reset_index().sort_values('structure_id', ascending=False).head(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f48d27a9-14e4-4bb9-a60a-73e9438b58fc",
"metadata": {},
"outputs": [],
"source": [
"?np.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14eaa0ea-02cc-430b-ab9b-38e6637810c3",
"metadata": {},
"outputs": [],
"source": [
"def info_colonnes_dataframe(df):\n",
" # Créer une liste pour stocker les informations sur chaque colonne\n",
" infos_colonnes = []\n",
"\n",
" # Parcourir les colonnes du DataFrame\n",
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
" # Calculer le taux de valeurs manquantes\n",
" taux_na = serie.isna().mean() * 100\n",
"\n",
" # Ajouter les informations à la liste\n",
" infos_colonnes.append({\n",
" 'Nom_colonne': nom_colonne,\n",
" 'Type_colonne': str(serie.dtype),\n",
" 'Taux_NA': taux_na\n",
" })\n",
"\n",
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
"\n",
" return df_infos_colonnes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b031c32-d4c8-42a5-9a71-a7810f9bf8d8",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"info_colonnes_dataframe(df1_tags)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1a87f27-c4d4-4832-ac20-0c3c54aa4980",
"metadata": {},
"outputs": [],
"source": [
"info_colonnes_dataframe(df1_structure_tag_mappings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa5c65a8-2f74-4f3f-85fc-9ac91e0bb361",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_colwidth', None)\n",
"\n",
"print(df1_tags['name'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a59bf932-5b54-4600-81f5-c55ac93ae510",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4ab298e-2cae-4865-9f00-4caff5f75ea1",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print(df1_tags['name'])"
]
},
{
"cell_type": "markdown",
"id": "76bffba1-5f7e-4308-9224-437ca66148f8",
"metadata": {},
"source": [
"## KPI sur target_type"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6daf22e-6583-4431-a467-660a1dd4e5a4",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 7,
2024-03-13 23:24:38 +01:00
"id": "d91d5895",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_colwidth', None)\n"
]
},
{
"cell_type": "markdown",
"id": "c58b17d3",
"metadata": {},
"source": [
"Raisonnement : on prends les target_type qui représente 90% des clients et on fait des catégories dessus."
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 8,
2024-03-13 23:24:38 +01:00
"id": "6930bff5",
"metadata": {},
"outputs": [],
"source": [
"def print_main_target(tenant_id, nb_print = 40):\n",
2024-03-23 10:48:47 +01:00
" df_target = display_input_databases(tenant_id, \"target_information\")\n",
2024-03-13 23:24:38 +01:00
"\n",
" print('Nombre de ciblage : ', len(df_target))\n",
" nb_customers = df_target['customer_id'].nunique()\n",
" print('Nombre de client avec étiquette target : ', nb_customers) \n",
"\n",
" nb_custumers_per_target = df_target.groupby(\"target_name\")['customer_id'].count().reset_index().sort_values('customer_id', ascending=False)\n",
" nb_custumers_per_target['cumulative_customers'] = nb_custumers_per_target['customer_id'].cumsum()/len(df_target)\n",
" nb_custumers_per_target['customer_id'] = nb_custumers_per_target['customer_id']/nb_customers\n",
"\n",
" return nb_custumers_per_target.head(nb_print)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
"id": "08488e43-56e0-461b-8770-c4e68d5c09f4",
2024-03-13 23:24:38 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1e7ee1a0",
"metadata": {
"scrolled": true
},
2024-03-13 23:24:38 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n",
"Nombre de ciblage : 768024\n",
"Nombre de client avec étiquette target : 151159\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>161</th>\n",
" <td>consentement optin mediation specialisee</td>\n",
" <td>0.992333</td>\n",
" <td>0.195306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>consentement optin jeune public</td>\n",
" <td>0.992194</td>\n",
" <td>0.390585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>158</th>\n",
" <td>consentement optin b2c</td>\n",
" <td>0.720493</td>\n",
" <td>0.532390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Arenametrix_bascule tel vers sib</td>\n",
" <td>0.232973</td>\n",
" <td>0.578242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>consentement optout b2c</td>\n",
" <td>0.228389</td>\n",
" <td>0.623193</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>COM Inscrits NL générale (historique)</td>\n",
" <td>0.152191</td>\n",
" <td>0.653146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>consentement optin newsletter generale</td>\n",
" <td>0.146171</td>\n",
" <td>0.681915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>169</th>\n",
" <td>consentement optout newsletter generale</td>\n",
" <td>0.124736</td>\n",
" <td>0.706465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>170</th>\n",
" <td>consentement optout scolaires</td>\n",
" <td>0.104155</td>\n",
" <td>0.726964</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>consentement optout dre</td>\n",
" <td>0.094788</td>\n",
" <td>0.745620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>164</th>\n",
" <td>consentement optout b2b</td>\n",
" <td>0.094067</td>\n",
" <td>0.764134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>Inscrits NL générale (export_291019 + operation_videomaton)</td>\n",
" <td>0.093187</td>\n",
" <td>0.782474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>157</th>\n",
" <td>consentement optin b2b</td>\n",
" <td>0.084249</td>\n",
" <td>0.799056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>216</th>\n",
" <td>ddcp_visiteurs dps 010622</td>\n",
" <td>0.081735</td>\n",
" <td>0.815142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Contacts_prenomsdoubles</td>\n",
" <td>0.077025</td>\n",
" <td>0.830302</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>FORMATION _ acheteurs optin last year</td>\n",
" <td>0.069364</td>\n",
" <td>0.843954</td>\n",
" </tr>\n",
" <tr>\n",
" <th>214</th>\n",
" <td>ddcp_promo_visiteurs occasionnels_musee_8mois</td>\n",
" <td>0.043927</td>\n",
" <td>0.852600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>189</th>\n",
" <td>ddcp_promo_md_musée_dps 011019</td>\n",
" <td>0.039759</td>\n",
" <td>0.860425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>188</th>\n",
" <td>ddcp_promo_MD_billet_musée_oct_2019_agarder2</td>\n",
" <td>0.036266</td>\n",
" <td>0.867563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163</th>\n",
" <td>consentement optin scolaires</td>\n",
" <td>0.032079</td>\n",
" <td>0.873876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>159</th>\n",
" <td>consentement optin dre</td>\n",
" <td>0.029949</td>\n",
" <td>0.879771</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>DDCP Newsletter enseignants</td>\n",
" <td>0.029836</td>\n",
" <td>0.885643</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>DDCP Newsletter jeune public</td>\n",
" <td>0.025549</td>\n",
" <td>0.890671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>Inscrits NL générale site web</td>\n",
" <td>0.024689</td>\n",
" <td>0.895531</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>Votre première liste</td>\n",
" <td>0.024577</td>\n",
" <td>0.900368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>DDCP billets famille</td>\n",
" <td>0.023876</td>\n",
" <td>0.905067</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>DRE MucemLab</td>\n",
" <td>0.015229</td>\n",
" <td>0.908064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>DDCP Newsletter relais champ social</td>\n",
" <td>0.015017</td>\n",
" <td>0.911020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>DRE institutionnels</td>\n",
" <td>0.014746</td>\n",
" <td>0.913922</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>DDCP PROMO Participants ateliers (adultes et enfants)</td>\n",
" <td>0.012927</td>\n",
" <td>0.916466</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>DDCP promo Plan B 2019 (concerts)</td>\n",
" <td>0.012887</td>\n",
" <td>0.919003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>DDCP promo MD pass musées dps oct 2018</td>\n",
" <td>0.011809</td>\n",
" <td>0.921327</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>DDCP rentrée culturelle 2023</td>\n",
" <td>0.011624</td>\n",
" <td>0.923614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>DDCP MD Procès du Siècle</td>\n",
" <td>0.011141</td>\n",
" <td>0.925807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>186</th>\n",
" <td>ddcp_md_scene_ouverte_au_talent</td>\n",
" <td>0.010433</td>\n",
" <td>0.927860</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108</th>\n",
" <td>DRE chercheurs</td>\n",
" <td>0.010300</td>\n",
" <td>0.929888</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220</th>\n",
" <td>festival_jean_rouch</td>\n",
" <td>0.009937</td>\n",
" <td>0.931843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>DRE Festival Jean Rouch</td>\n",
" <td>0.009937</td>\n",
" <td>0.933799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>275</th>\n",
" <td>structures_etiquette champ social</td>\n",
" <td>0.009844</td>\n",
" <td>0.935736</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)</td>\n",
" <td>0.008554</td>\n",
" <td>0.937420</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>Inscrits NL jeune public site web</td>\n",
" <td>0.008263</td>\n",
" <td>0.939046</td>\n",
" </tr>\n",
" <tr>\n",
" <th>260</th>\n",
" <td>rappel po barvalo</td>\n",
" <td>0.008256</td>\n",
" <td>0.940671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>104</th>\n",
" <td>DDCP_marseille_jazz_2023</td>\n",
" <td>0.006900</td>\n",
" <td>0.942029</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>DDCP Newsletter centres de loisirs</td>\n",
" <td>0.006827</td>\n",
" <td>0.943373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Autres_interet_exposition</td>\n",
" <td>0.006754</td>\n",
" <td>0.944702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>228</th>\n",
" <td>import_arenametrix_contactstousecardouv_expo</td>\n",
" <td>0.006212</td>\n",
" <td>0.945925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>Formation clients fidèles</td>\n",
" <td>0.006047</td>\n",
" <td>0.947115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>DDCP Cine 2023</td>\n",
" <td>0.005656</td>\n",
" <td>0.948228</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>DDCP OLBJ! 2023</td>\n",
" <td>0.005464</td>\n",
" <td>0.949304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240</th>\n",
" <td>journee-de-l-inclusion_20230601_21h25</td>\n",
" <td>0.005326</td>\n",
" <td>0.950352</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>Questionnaire 2 satisfaction scolaire</td>\n",
" <td>0.005259</td>\n",
" <td>0.951387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93</th>\n",
" <td>DDCP rendez-vous de septembre offre spéciale</td>\n",
" <td>0.005253</td>\n",
" <td>0.952421</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>Plan B 2018 (électro)</td>\n",
" <td>0.005081</td>\n",
" <td>0.953421</td>\n",
" </tr>\n",
" <tr>\n",
" <th>270</th>\n",
" <td>save_the_date_populaire</td>\n",
" <td>0.004948</td>\n",
" <td>0.954395</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>Newsletter CCR (passerelle)</td>\n",
" <td>0.004783</td>\n",
" <td>0.955336</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>Fichier institutionnel (ne pas utiliser sans autorisation)</td>\n",
" <td>0.004538</td>\n",
" <td>0.956229</td>\n",
" </tr>\n",
" <tr>\n",
" <th>222</th>\n",
" <td>fichier institutionnel_ne_pas_toucher</td>\n",
" <td>0.004532</td>\n",
" <td>0.957121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>266</th>\n",
" <td>reservations_payees_pass_culture_190422_au_310123</td>\n",
" <td>0.004492</td>\n",
" <td>0.958005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>DDCP spectateurs Marseille Jazz 18-19-21</td>\n",
" <td>0.004432</td>\n",
" <td>0.958878</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>acid arab</td>\n",
" <td>0.004413</td>\n",
" <td>0.959746</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_name \\\n",
"161 consentement optin mediation specialisee \n",
"160 consentement optin jeune public \n",
"158 consentement optin b2c \n",
"5 Arenametrix_bascule tel vers sib \n",
"165 consentement optout b2c \n",
"19 COM Inscrits NL générale (historique) \n",
"162 consentement optin newsletter generale \n",
"169 consentement optout newsletter generale \n",
"170 consentement optout scolaires \n",
"166 consentement optout dre \n",
"164 consentement optout b2b \n",
"126 Inscrits NL générale (export_291019 + operation_videomaton) \n",
"157 consentement optin b2b \n",
"216 ddcp_visiteurs dps 010622 \n",
"20 Contacts_prenomsdoubles \n",
"115 FORMATION _ acheteurs optin last year \n",
"214 ddcp_promo_visiteurs occasionnels_musee_8mois \n",
"189 ddcp_promo_md_musée_dps 011019 \n",
"188 ddcp_promo_MD_billet_musée_oct_2019_agarder2 \n",
"163 consentement optin scolaires \n",
"159 consentement optin dre \n",
"34 DDCP Newsletter enseignants \n",
"36 DDCP Newsletter jeune public \n",
"127 Inscrits NL générale site web \n",
"145 Votre première liste \n",
"61 DDCP billets famille \n",
"106 DRE MucemLab \n",
"39 DDCP Newsletter relais champ social \n",
"110 DRE institutionnels \n",
"48 DDCP PROMO Participants ateliers (adultes et enfants) \n",
"74 DDCP promo Plan B 2019 (concerts) \n",
"72 DDCP promo MD pass musées dps oct 2018 \n",
"94 DDCP rentrée culturelle 2023 \n",
"23 DDCP MD Procès du Siècle \n",
"186 ddcp_md_scene_ouverte_au_talent \n",
"108 DRE chercheurs \n",
"220 festival_jean_rouch \n",
"105 DRE Festival Jean Rouch \n",
"275 structures_etiquette champ social \n",
"86 DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers) \n",
"128 Inscrits NL jeune public site web \n",
"260 rappel po barvalo \n",
"104 DDCP_marseille_jazz_2023 \n",
"32 DDCP Newsletter centres de loisirs \n",
"13 Autres_interet_exposition \n",
"228 import_arenametrix_contactstousecardouv_expo \n",
"117 Formation clients fidèles \n",
"22 DDCP Cine 2023 \n",
"40 DDCP OLBJ! 2023 \n",
"240 journee-de-l-inclusion_20230601_21h25 \n",
"137 Questionnaire 2 satisfaction scolaire \n",
"93 DDCP rendez-vous de septembre offre spéciale \n",
"135 Plan B 2018 (électro) \n",
"270 save_the_date_populaire \n",
"132 Newsletter CCR (passerelle) \n",
"116 Fichier institutionnel (ne pas utiliser sans autorisation) \n",
"222 fichier institutionnel_ne_pas_toucher \n",
"266 reservations_payees_pass_culture_190422_au_310123 \n",
"102 DDCP spectateurs Marseille Jazz 18-19-21 \n",
"147 acid arab \n",
"\n",
" customer_id cumulative_customers \n",
"161 0.992333 0.195306 \n",
"160 0.992194 0.390585 \n",
"158 0.720493 0.532390 \n",
"5 0.232973 0.578242 \n",
"165 0.228389 0.623193 \n",
"19 0.152191 0.653146 \n",
"162 0.146171 0.681915 \n",
"169 0.124736 0.706465 \n",
"170 0.104155 0.726964 \n",
"166 0.094788 0.745620 \n",
"164 0.094067 0.764134 \n",
"126 0.093187 0.782474 \n",
"157 0.084249 0.799056 \n",
"216 0.081735 0.815142 \n",
"20 0.077025 0.830302 \n",
"115 0.069364 0.843954 \n",
"214 0.043927 0.852600 \n",
"189 0.039759 0.860425 \n",
"188 0.036266 0.867563 \n",
"163 0.032079 0.873876 \n",
"159 0.029949 0.879771 \n",
"34 0.029836 0.885643 \n",
"36 0.025549 0.890671 \n",
"127 0.024689 0.895531 \n",
"145 0.024577 0.900368 \n",
"61 0.023876 0.905067 \n",
"106 0.015229 0.908064 \n",
"39 0.015017 0.911020 \n",
"110 0.014746 0.913922 \n",
"48 0.012927 0.916466 \n",
"74 0.012887 0.919003 \n",
"72 0.011809 0.921327 \n",
"94 0.011624 0.923614 \n",
"23 0.011141 0.925807 \n",
"186 0.010433 0.927860 \n",
"108 0.010300 0.929888 \n",
"220 0.009937 0.931843 \n",
"105 0.009937 0.933799 \n",
"275 0.009844 0.935736 \n",
"86 0.008554 0.937420 \n",
"128 0.008263 0.939046 \n",
"260 0.008256 0.940671 \n",
"104 0.006900 0.942029 \n",
"32 0.006827 0.943373 \n",
"13 0.006754 0.944702 \n",
"228 0.006212 0.945925 \n",
"117 0.006047 0.947115 \n",
"22 0.005656 0.948228 \n",
"40 0.005464 0.949304 \n",
"240 0.005326 0.950352 \n",
"137 0.005259 0.951387 \n",
"93 0.005253 0.952421 \n",
"135 0.005081 0.953421 \n",
"270 0.004948 0.954395 \n",
"132 0.004783 0.955336 \n",
"116 0.004538 0.956229 \n",
"222 0.004532 0.957121 \n",
"266 0.004492 0.958005 \n",
"102 0.004432 0.958878 \n",
"147 0.004413 0.959746 "
]
},
2024-03-23 10:48:47 +01:00
"execution_count": 9,
2024-03-13 23:24:38 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.set_option(\"max_colwidth\", None)\n",
"print_main_target('1', 60)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 11,
"id": "c66a4dc1",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19f3a2dd-ba3d-4dec-8e10-fed544ab6a53",
"metadata": {},
"outputs": [],
"source": [
"pd.reset_option('display.max_rows')"
]
},
{
"cell_type": "code",
"execution_count": null,
2024-03-13 23:24:38 +01:00
"id": "b57a28ac",
2024-03-23 10:48:47 +01:00
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print_main_target('2', 25)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a65991f",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print_main_target('3', 70)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f34b8bf",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print_main_target('4', 100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40fe3676",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)\n",
"\n",
"print_main_target('5', 100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "820d3600-379b-4245-a977-f1f1fa1f1839",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print_main_target('6', 100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86f64a1b-763a-4e43-9601-a38c80392d47",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print_main_target('7', 100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fbf2ea42-515a-4cdf-a4c1-50f99c379ed9",
2024-03-13 23:24:38 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
"source": [
"print_main_target('8', 100)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9684045c-4e25-4952-b099-a559baa5d749",
"metadata": {
"scrolled": true
},
2024-03-13 23:24:38 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-23 10:48:47 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_9/target_information.csv\n",
"Nombre de ciblage : 1399179\n",
"Nombre de client avec étiquette target : 181136\n"
2024-03-13 23:24:38 +01:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
2024-03-23 10:48:47 +01:00
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>184</th>\n",
" <td>Run Mate</td>\n",
" <td>0.999939</td>\n",
" <td>0.129451</td>\n",
" </tr>\n",
" <tr>\n",
" <th>233</th>\n",
" <td>Triathlon</td>\n",
" <td>0.999934</td>\n",
" <td>0.258901</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>HGM</td>\n",
" <td>0.999934</td>\n",
" <td>0.388351</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>20km Genève</td>\n",
" <td>0.999917</td>\n",
" <td>0.517799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>GM23</td>\n",
" <td>0.996561</td>\n",
" <td>0.646813</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>GGM - inscrits 2015 - 2023 - FR</td>\n",
" <td>0.261439</td>\n",
" <td>0.680658</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>HGM - Inscrits 2005-2019 - FR</td>\n",
" <td>0.243110</td>\n",
" <td>0.712131</td>\n",
" </tr>\n",
" <tr>\n",
" <th>187</th>\n",
" <td>RunMate - 500km autour montreux - FR</td>\n",
" <td>0.236612</td>\n",
" <td>0.742763</td>\n",
" </tr>\n",
" <tr>\n",
" <th>188</th>\n",
" <td>RunMate - 500km autour montreux - FR v2</td>\n",
" <td>0.204162</td>\n",
" <td>0.769193</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>BDD - Semi &amp; Marathon_FR</td>\n",
" <td>0.169536</td>\n",
" <td>0.791141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>HGM - GM Public - FR</td>\n",
" <td>0.154420</td>\n",
" <td>0.811132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>HGM - Inscrits 2005-2019 - ENG</td>\n",
" <td>0.113401</td>\n",
" <td>0.825813</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>GGM - inscrits 2015 - 2023 - ENG</td>\n",
" <td>0.107405</td>\n",
" <td>0.839717</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>GGM23 - inscrits - FR</td>\n",
" <td>0.066734</td>\n",
" <td>0.848357</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>BDD - Semi &amp; Marathon_ENG</td>\n",
" <td>0.063240</td>\n",
" <td>0.856544</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>HGM - GM Public - ENG</td>\n",
" <td>0.061893</td>\n",
" <td>0.864556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>LTGT - Inscrits 2008-2019 - FR</td>\n",
" <td>0.050371</td>\n",
" <td>0.871077</td>\n",
" </tr>\n",
" <tr>\n",
" <th>186</th>\n",
" <td>RunMate - 500km autour montreux - ENG</td>\n",
" <td>0.042316</td>\n",
" <td>0.876555</td>\n",
" </tr>\n",
" <tr>\n",
" <th>210</th>\n",
" <td>TMB - Opt-in</td>\n",
" <td>0.039738</td>\n",
" <td>0.881700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>LTGT23-Amateur-FR</td>\n",
" <td>0.036067</td>\n",
" <td>0.886369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>HGM désinscriptions</td>\n",
" <td>0.031893</td>\n",
" <td>0.890498</td>\n",
" </tr>\n",
" <tr>\n",
" <th>213</th>\n",
" <td>TMB - anciens participants 2015 - 2020 - FR+ENG</td>\n",
" <td>0.028145</td>\n",
" <td>0.894141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>20km - Inscrits 2017-2019 - FR</td>\n",
" <td>0.027267</td>\n",
" <td>0.897671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>LTGT - inscrits 2015 - 2020 - FR</td>\n",
" <td>0.026185</td>\n",
" <td>0.901061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>20KM23_Inscrit NL 25.10_FR</td>\n",
" <td>0.023049</td>\n",
" <td>0.904045</td>\n",
" </tr>\n",
2024-03-13 23:24:38 +01:00
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>185</th>\n",
" <td>Run Mate désinscriptions</td>\n",
" <td>0.021746</td>\n",
" <td>0.906860</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>303</th>\n",
" <td>test listes</td>\n",
" <td>0.021321</td>\n",
" <td>0.909621</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>72</th>\n",
" <td>GGM23 - inscrits - ENG</td>\n",
" <td>0.021304</td>\n",
" <td>0.912379</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>9</th>\n",
" <td>20KM23_FINISHER_FR2</td>\n",
" <td>0.020316</td>\n",
" <td>0.915009</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-23 10:48:47 +01:00
" <td>20KM23_FINISHER_FR</td>\n",
" <td>0.020316</td>\n",
" <td>0.917639</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>38</th>\n",
" <td>20km23_nl finisher fr</td>\n",
" <td>0.020316</td>\n",
" <td>0.920269</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>107</th>\n",
" <td>LTGT21 - Inscrits FR+ENG</td>\n",
" <td>0.019058</td>\n",
" <td>0.922736</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>33</th>\n",
" <td>20km23_inscrits fr 05.10_test</td>\n",
" <td>0.017445</td>\n",
" <td>0.924995</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>25</th>\n",
" <td>20km Genève désinscriptions</td>\n",
" <td>0.017390</td>\n",
" <td>0.927246</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>113</th>\n",
" <td>LTGT22 - inscris FR+ENG</td>\n",
" <td>0.016601</td>\n",
" <td>0.929395</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>47</th>\n",
" <td>Bénévoles Mass Events</td>\n",
" <td>0.016501</td>\n",
" <td>0.931531</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>32</th>\n",
" <td>20km23_inscrits fr 02.10</td>\n",
" <td>0.016479</td>\n",
" <td>0.933665</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>64</th>\n",
" <td>GGM - Contact PT OK</td>\n",
" <td>0.016065</td>\n",
" <td>0.935744</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>112</th>\n",
" <td>LTGT22 - Anciens participants Half, Half relais, standard, standard relais - FR</td>\n",
" <td>0.015425</td>\n",
" <td>0.937741</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>225</th>\n",
" <td>TMB23-Amateur-FR</td>\n",
" <td>0.013349</td>\n",
" <td>0.939470</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>110</th>\n",
" <td>LTGT21 - inscrits FR</td>\n",
" <td>0.012642</td>\n",
" <td>0.941106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>219</th>\n",
" <td>TMB22 - inscrits FR</td>\n",
" <td>0.010732</td>\n",
" <td>0.942496</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>LTGT - Inscrits 2008-2019 - ENG</td>\n",
" <td>0.010688</td>\n",
" <td>0.943879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>LTGT22 - inscrits FR</td>\n",
" <td>0.010368</td>\n",
" <td>0.945221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>203</th>\n",
" <td>TIMB22 - incritS FR+ENG</td>\n",
" <td>0.010064</td>\n",
" <td>0.946524</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>20km22 - inscrits - FR</td>\n",
" <td>0.010009</td>\n",
" <td>0.947820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>20km20 - Inscrits finaux - FR</td>\n",
" <td>0.009518</td>\n",
" <td>0.949052</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20KM21 - inscrits FR</td>\n",
" <td>0.009507</td>\n",
" <td>0.950283</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-23 10:48:47 +01:00
" <td>20KM - Contact PT OKb</td>\n",
" <td>0.009451</td>\n",
" <td>0.951507</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>175</th>\n",
" <td>RM21- Inscrits FR+ENG</td>\n",
" <td>0.009098</td>\n",
" <td>0.952684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>222</th>\n",
" <td>TMB23 - inscrits FR + ENG</td>\n",
" <td>0.008518</td>\n",
" <td>0.953787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>223</th>\n",
" <td>TMB23 - inscrits FR + ENG -</td>\n",
" <td>0.008518</td>\n",
" <td>0.954890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>304</th>\n",
" <td>tmb - pro - fr - VF</td>\n",
" <td>0.008298</td>\n",
" <td>0.955964</td>\n",
" </tr>\n",
" <tr>\n",
" <th>221</th>\n",
" <td>TMB23 - inscrits FR</td>\n",
" <td>0.007834</td>\n",
" <td>0.956978</td>\n",
" </tr>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>LTGT20 - Inscrits - FR</td>\n",
" <td>0.007746</td>\n",
" <td>0.957981</td>\n",
" </tr>\n",
" <tr>\n",
" <th>234</th>\n",
" <td>Triathlon désinscriptions</td>\n",
" <td>0.007740</td>\n",
" <td>0.958983</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238</th>\n",
" <td>ggm23 - inscrits fr + eng</td>\n",
" <td>0.007276</td>\n",
" <td>0.959925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>LTGT23-Amateur-ENG- vf</td>\n",
" <td>0.007166</td>\n",
" <td>0.960853</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>LTGT23-Amateur-ENG</td>\n",
" <td>0.007166</td>\n",
" <td>0.961780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>173</th>\n",
" <td>RM21 - inscrits FR (reportés + new)</td>\n",
" <td>0.007160</td>\n",
" <td>0.962707</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>TE23_Inscrits_FR</td>\n",
" <td>0.007116</td>\n",
" <td>0.963629</td>\n",
" </tr>\n",
" <tr>\n",
" <th>153</th>\n",
" <td>RM - Contact PT OK</td>\n",
" <td>0.006967</td>\n",
" <td>0.964531</td>\n",
" </tr>\n",
" <tr>\n",
" <th>177</th>\n",
" <td>RM22 - inscrits FR</td>\n",
" <td>0.006879</td>\n",
" <td>0.965421</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>20km23_inscrits nl 1 fr - 17.08</td>\n",
" <td>0.006879</td>\n",
" <td>0.966312</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>20KM23_Inscrits NL 1 FR - 17.08</td>\n",
" <td>0.006879</td>\n",
" <td>0.967202</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>LTGT - Inscrits NL - FR</td>\n",
" <td>0.006669</td>\n",
" <td>0.968066</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>RM20 - inscrits 2020 (Total avec reports) - FR</td>\n",
" <td>0.006592</td>\n",
" <td>0.968919</td>\n",
" </tr>\n",
" <tr>\n",
" <th>297</th>\n",
" <td>rm23_nl finisher fr</td>\n",
" <td>0.006117</td>\n",
" <td>0.969711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>180</th>\n",
" <td>RM23_NL Finisher FR</td>\n",
" <td>0.006117</td>\n",
" <td>0.970503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>RM19 - Inscrits - FR</td>\n",
" <td>0.006106</td>\n",
" <td>0.971293</td>\n",
" </tr>\n",
" <tr>\n",
" <th>292</th>\n",
" <td>rm23_inscrits nl 19.09 fr</td>\n",
" <td>0.006089</td>\n",
" <td>0.972081</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>GGM - Inscrits NL - FR</td>\n",
" <td>0.006023</td>\n",
" <td>0.972861</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93</th>\n",
" <td>LTGT - Contact PT OK</td>\n",
" <td>0.005973</td>\n",
" <td>0.973635</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174</th>\n",
" <td>RM21 - new inscrits - FR</td>\n",
" <td>0.005808</td>\n",
" <td>0.974386</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>20km - Inscrits 2017-2019 - ENG</td>\n",
" <td>0.005543</td>\n",
" <td>0.975104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>20km23_no show fr</td>\n",
" <td>0.005443</td>\n",
" <td>0.975809</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>20KM23_NO SHOW_FR</td>\n",
" <td>0.005443</td>\n",
" <td>0.976513</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101</th>\n",
" <td>LTGT - inscrits 2015-2020 - ENG</td>\n",
" <td>0.005405</td>\n",
" <td>0.977213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>20km - Inscrits NL - FR</td>\n",
" <td>0.005377</td>\n",
" <td>0.977909</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109</th>\n",
" <td>LTGT21 - Reportés - FR</td>\n",
" <td>0.005366</td>\n",
" <td>0.978604</td>\n",
" </tr>\n",
" <tr>\n",
" <th>214</th>\n",
" <td>TMB - anciens participants Standard et HALF</td>\n",
" <td>0.004924</td>\n",
" <td>0.979241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>242</th>\n",
" <td>ltgt - pro - fr vf</td>\n",
" <td>0.004798</td>\n",
" <td>0.979862</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>20km23_nl 3 26.10 eng</td>\n",
" <td>0.004720</td>\n",
" <td>0.980474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>20km22 - inscrits - ANG</td>\n",
" <td>0.004599</td>\n",
" <td>0.981069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>GGM24_INSCRIS FR&amp;ENG</td>\n",
" <td>0.004245</td>\n",
" <td>0.981619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>239</th>\n",
" <td>ggm24_inscrits 05.10</td>\n",
" <td>0.004245</td>\n",
" <td>0.982168</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>Entreprises - FR</td>\n",
" <td>0.004091</td>\n",
" <td>0.982698</td>\n",
" </tr>\n",
" <tr>\n",
" <th>290</th>\n",
" <td>rm23_inscrits fr nl2 21.07</td>\n",
" <td>0.004003</td>\n",
" <td>0.983216</td>\n",
" </tr>\n",
" <tr>\n",
" <th>208</th>\n",
" <td>TMB - Contact PT OK</td>\n",
" <td>0.003771</td>\n",
" <td>0.983704</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>20km23_inscrits eng 02.10</td>\n",
" <td>0.003699</td>\n",
" <td>0.984183</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>20km23_nl finisher eng</td>\n",
" <td>0.003588</td>\n",
" <td>0.984647</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>20KM23_FINISHER_ENG</td>\n",
" <td>0.003588</td>\n",
" <td>0.985112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>LTGT22 - Anciens participants Half, Half relais, standard, standard relais - EN</td>\n",
" <td>0.003555</td>\n",
" <td>0.985572</td>\n",
" </tr>\n",
" <tr>\n",
" <th>207</th>\n",
" <td>TIMB23 - liste d'attente - FR</td>\n",
" <td>0.003307</td>\n",
" <td>0.986000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>205</th>\n",
" <td>TIMB22 - reports FR+EN - VF</td>\n",
" <td>0.003058</td>\n",
" <td>0.986396</td>\n",
" </tr>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>RM20 - Inscrits sur liste d'attente - FR</td>\n",
" <td>0.003058</td>\n",
" <td>0.986792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>LTGT21 - Inscrits ENG</td>\n",
" <td>0.003031</td>\n",
" <td>0.987185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>216</th>\n",
" <td>TMB21 - Liste d'attente - FR</td>\n",
" <td>0.002992</td>\n",
" <td>0.987572</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202</th>\n",
" <td>TIMB21 - liste d'attente - FR</td>\n",
" <td>0.002992</td>\n",
" <td>0.987959</td>\n",
" </tr>\n",
" <tr>\n",
" <th>245</th>\n",
" <td>ltgt23 - non half eng</td>\n",
" <td>0.002871</td>\n",
" <td>0.988331</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-23 10:48:47 +01:00
" target_name \\\n",
"184 Run Mate \n",
"233 Triathlon \n",
"77 HGM \n",
"24 20km Genève \n",
"75 GM23 \n",
"68 GGM - inscrits 2015 - 2023 - FR \n",
"85 HGM - Inscrits 2005-2019 - FR \n",
"187 RunMate - 500km autour montreux - FR \n",
"188 RunMate - 500km autour montreux - FR v2 \n",
"46 BDD - Semi & Marathon_FR \n",
"83 HGM - GM Public - FR \n",
"84 HGM - Inscrits 2005-2019 - ENG \n",
"67 GGM - inscrits 2015 - 2023 - ENG \n",
"73 GGM23 - inscrits - FR \n",
"45 BDD - Semi & Marathon_ENG \n",
"82 HGM - GM Public - ENG \n",
"95 LTGT - Inscrits 2008-2019 - FR \n",
"186 RunMate - 500km autour montreux - ENG \n",
"210 TMB - Opt-in \n",
"118 LTGT23-Amateur-FR \n",
"88 HGM désinscriptions \n",
"213 TMB - anciens participants 2015 - 2020 - FR+ENG \n",
"19 20km - Inscrits 2017-2019 - FR \n",
"100 LTGT - inscrits 2015 - 2020 - FR \n",
"10 20KM23_Inscrit NL 25.10_FR \n",
"185 Run Mate désinscriptions \n",
"303 test listes \n",
"72 GGM23 - inscrits - ENG \n",
"9 20KM23_FINISHER_FR2 \n",
"8 20KM23_FINISHER_FR \n",
"38 20km23_nl finisher fr \n",
"107 LTGT21 - Inscrits FR+ENG \n",
"33 20km23_inscrits fr 05.10_test \n",
"25 20km Genève désinscriptions \n",
"113 LTGT22 - inscris FR+ENG \n",
"47 Bénévoles Mass Events \n",
"32 20km23_inscrits fr 02.10 \n",
"64 GGM - Contact PT OK \n",
"112 LTGT22 - Anciens participants Half, Half relais, standard, standard relais - FR \n",
"225 TMB23-Amateur-FR \n",
"110 LTGT21 - inscrits FR \n",
"219 TMB22 - inscrits FR \n",
"94 LTGT - Inscrits 2008-2019 - ENG \n",
"115 LTGT22 - inscrits FR \n",
"203 TIMB22 - incritS FR+ENG \n",
"29 20km22 - inscrits - FR \n",
"27 20km20 - Inscrits finaux - FR \n",
"4 20KM21 - inscrits FR \n",
"2 20KM - Contact PT OKb \n",
"175 RM21- Inscrits FR+ENG \n",
"222 TMB23 - inscrits FR + ENG \n",
"223 TMB23 - inscrits FR + ENG - \n",
"304 tmb - pro - fr - VF \n",
"221 TMB23 - inscrits FR \n",
"105 LTGT20 - Inscrits - FR \n",
"234 Triathlon désinscriptions \n",
"238 ggm23 - inscrits fr + eng \n",
"117 LTGT23-Amateur-ENG- vf \n",
"116 LTGT23-Amateur-ENG \n",
"173 RM21 - inscrits FR (reportés + new) \n",
"197 TE23_Inscrits_FR \n",
"153 RM - Contact PT OK \n",
"177 RM22 - inscrits FR \n",
"35 20km23_inscrits nl 1 fr - 17.08 \n",
"12 20KM23_Inscrits NL 1 FR - 17.08 \n",
"97 LTGT - Inscrits NL - FR \n",
"166 RM20 - inscrits 2020 (Total avec reports) - FR \n",
"297 rm23_nl finisher fr \n",
"180 RM23_NL Finisher FR \n",
"162 RM19 - Inscrits - FR \n",
"292 rm23_inscrits nl 19.09 fr \n",
"66 GGM - Inscrits NL - FR \n",
"93 LTGT - Contact PT OK \n",
"174 RM21 - new inscrits - FR \n",
"18 20km - Inscrits 2017-2019 - ENG \n",
"42 20km23_no show fr \n",
"16 20KM23_NO SHOW_FR \n",
"101 LTGT - inscrits 2015-2020 - ENG \n",
"21 20km - Inscrits NL - FR \n",
"109 LTGT21 - Reportés - FR \n",
"214 TMB - anciens participants Standard et HALF \n",
"242 ltgt - pro - fr vf \n",
"36 20km23_nl 3 26.10 eng \n",
"28 20km22 - inscrits - ANG \n",
"74 GGM24_INSCRIS FR&ENG \n",
"239 ggm24_inscrits 05.10 \n",
"62 Entreprises - FR \n",
"290 rm23_inscrits fr nl2 21.07 \n",
"208 TMB - Contact PT OK \n",
"31 20km23_inscrits eng 02.10 \n",
"37 20km23_nl finisher eng \n",
"7 20KM23_FINISHER_ENG \n",
"111 LTGT22 - Anciens participants Half, Half relais, standard, standard relais - EN \n",
"207 TIMB23 - liste d'attente - FR \n",
"205 TIMB22 - reports FR+EN - VF \n",
"165 RM20 - Inscrits sur liste d'attente - FR \n",
"106 LTGT21 - Inscrits ENG \n",
"216 TMB21 - Liste d'attente - FR \n",
"202 TIMB21 - liste d'attente - FR \n",
"245 ltgt23 - non half eng \n",
2024-03-13 23:24:38 +01:00
"\n",
2024-03-23 10:48:47 +01:00
" customer_id cumulative_customers \n",
"184 0.999939 0.129451 \n",
"233 0.999934 0.258901 \n",
"77 0.999934 0.388351 \n",
"24 0.999917 0.517799 \n",
"75 0.996561 0.646813 \n",
"68 0.261439 0.680658 \n",
"85 0.243110 0.712131 \n",
"187 0.236612 0.742763 \n",
"188 0.204162 0.769193 \n",
"46 0.169536 0.791141 \n",
"83 0.154420 0.811132 \n",
"84 0.113401 0.825813 \n",
"67 0.107405 0.839717 \n",
"73 0.066734 0.848357 \n",
"45 0.063240 0.856544 \n",
"82 0.061893 0.864556 \n",
"95 0.050371 0.871077 \n",
"186 0.042316 0.876555 \n",
"210 0.039738 0.881700 \n",
"118 0.036067 0.886369 \n",
"88 0.031893 0.890498 \n",
"213 0.028145 0.894141 \n",
"19 0.027267 0.897671 \n",
"100 0.026185 0.901061 \n",
"10 0.023049 0.904045 \n",
"185 0.021746 0.906860 \n",
"303 0.021321 0.909621 \n",
"72 0.021304 0.912379 \n",
"9 0.020316 0.915009 \n",
"8 0.020316 0.917639 \n",
"38 0.020316 0.920269 \n",
"107 0.019058 0.922736 \n",
"33 0.017445 0.924995 \n",
"25 0.017390 0.927246 \n",
"113 0.016601 0.929395 \n",
"47 0.016501 0.931531 \n",
"32 0.016479 0.933665 \n",
"64 0.016065 0.935744 \n",
"112 0.015425 0.937741 \n",
"225 0.013349 0.939470 \n",
"110 0.012642 0.941106 \n",
"219 0.010732 0.942496 \n",
"94 0.010688 0.943879 \n",
"115 0.010368 0.945221 \n",
"203 0.010064 0.946524 \n",
"29 0.010009 0.947820 \n",
"27 0.009518 0.949052 \n",
"4 0.009507 0.950283 \n",
"2 0.009451 0.951507 \n",
"175 0.009098 0.952684 \n",
"222 0.008518 0.953787 \n",
"223 0.008518 0.954890 \n",
"304 0.008298 0.955964 \n",
"221 0.007834 0.956978 \n",
"105 0.007746 0.957981 \n",
"234 0.007740 0.958983 \n",
"238 0.007276 0.959925 \n",
"117 0.007166 0.960853 \n",
"116 0.007166 0.961780 \n",
"173 0.007160 0.962707 \n",
"197 0.007116 0.963629 \n",
"153 0.006967 0.964531 \n",
"177 0.006879 0.965421 \n",
"35 0.006879 0.966312 \n",
"12 0.006879 0.967202 \n",
"97 0.006669 0.968066 \n",
"166 0.006592 0.968919 \n",
"297 0.006117 0.969711 \n",
"180 0.006117 0.970503 \n",
"162 0.006106 0.971293 \n",
"292 0.006089 0.972081 \n",
"66 0.006023 0.972861 \n",
"93 0.005973 0.973635 \n",
"174 0.005808 0.974386 \n",
"18 0.005543 0.975104 \n",
"42 0.005443 0.975809 \n",
"16 0.005443 0.976513 \n",
"101 0.005405 0.977213 \n",
"21 0.005377 0.977909 \n",
"109 0.005366 0.978604 \n",
"214 0.004924 0.979241 \n",
"242 0.004798 0.979862 \n",
"36 0.004720 0.980474 \n",
"28 0.004599 0.981069 \n",
"74 0.004245 0.981619 \n",
"239 0.004245 0.982168 \n",
"62 0.004091 0.982698 \n",
"290 0.004003 0.983216 \n",
"208 0.003771 0.983704 \n",
"31 0.003699 0.984183 \n",
"37 0.003588 0.984647 \n",
"7 0.003588 0.985112 \n",
"111 0.003555 0.985572 \n",
"207 0.003307 0.986000 \n",
"205 0.003058 0.986396 \n",
"165 0.003058 0.986792 \n",
"106 0.003031 0.987185 \n",
"216 0.002992 0.987572 \n",
"202 0.002992 0.987959 \n",
"245 0.002871 0.988331 "
2024-03-13 23:24:38 +01:00
]
},
2024-03-23 10:48:47 +01:00
"execution_count": 12,
2024-03-13 23:24:38 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-23 10:48:47 +01:00
"print_main_target('9', 100)"
2024-03-13 23:24:38 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 13,
"id": "cf8f7816-e7f3-4b7a-a987-8350a76eb140",
"metadata": {
"scrolled": true
},
2024-03-13 23:24:38 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-23 10:48:47 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n",
"Nombre de ciblage : 69258\n",
"Nombre de client avec étiquette target : 53639\n"
2024-03-13 23:24:38 +01:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>9</th>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>0.722068</td>\n",
" <td>0.559228</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>2</th>\n",
" <td>Blackliste</td>\n",
" <td>0.188669</td>\n",
" <td>0.705348</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>36</th>\n",
" <td>import opt-in février 2023</td>\n",
" <td>0.068141</td>\n",
" <td>0.758122</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>37</th>\n",
" <td>import opt-out fév 23</td>\n",
" <td>0.055016</td>\n",
" <td>0.800731</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>5</th>\n",
" <td>Contacts opt-out 20.10.22</td>\n",
" <td>0.047726</td>\n",
" <td>0.837694</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
2024-03-23 10:48:47 +01:00
" <td>Théâtre amateur</td>\n",
" <td>0.033054</td>\n",
" <td>0.863294</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>41</th>\n",
" <td>liste théâtre amateur</td>\n",
" <td>0.026641</td>\n",
" <td>0.883927</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>4</th>\n",
" <td>CP EUH 7 octobre 2022</td>\n",
" <td>0.026492</td>\n",
" <td>0.904444</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>47</th>\n",
" <td>spec Falaise - relance Mazut</td>\n",
" <td>0.020452</td>\n",
" <td>0.920284</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>35</th>\n",
" <td>fichierspectateurs_recreatrales</td>\n",
" <td>0.016052</td>\n",
" <td>0.932715</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>29</th>\n",
" <td>fichier news quartier grand t</td>\n",
" <td>0.007644</td>\n",
" <td>0.938635</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>33</th>\n",
" <td>fichier2 news quartier grand t</td>\n",
" <td>0.007383</td>\n",
" <td>0.944353</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>23</th>\n",
" <td>contacts amateurs 22-23 ok ok</td>\n",
" <td>0.006842</td>\n",
" <td>0.949652</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>22</th>\n",
" <td>amateurs incandescences oct 22</td>\n",
" <td>0.006730</td>\n",
" <td>0.954864</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>25</th>\n",
" <td>fichier invit Soir de fête</td>\n",
" <td>0.005984</td>\n",
" <td>0.959499</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>19</th>\n",
" <td>Spectateurs Ce qu'il faut dire</td>\n",
" <td>0.004959</td>\n",
" <td>0.963340</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>49</th>\n",
" <td>spec Mazut - tout</td>\n",
" <td>0.003300</td>\n",
" <td>0.965896</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>26</th>\n",
" <td>fichier invitation déjeuner chantier le 8 juin</td>\n",
" <td>0.003151</td>\n",
" <td>0.968336</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>0</th>\n",
" <td>3 M - spec du dimanche (toutes rep)</td>\n",
" <td>0.003132</td>\n",
" <td>0.970762</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>30</th>\n",
" <td>fichier op les dodos 21 nov global</td>\n",
" <td>0.003020</td>\n",
" <td>0.973101</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>8</th>\n",
" <td>Invitation protocolaire Les Fauves</td>\n",
" <td>0.002573</td>\n",
" <td>0.975093</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>1</th>\n",
" <td>3 M - spec du samedi (toutes rep)</td>\n",
" <td>0.002554</td>\n",
" <td>0.977071</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>28</th>\n",
" <td>fichier jumelage halveque</td>\n",
" <td>0.002498</td>\n",
" <td>0.979006</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>27</th>\n",
" <td>fichier invitations pro les enfants d'amazi</td>\n",
" <td>0.002256</td>\n",
" <td>0.980753</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>18</th>\n",
" <td>Spectateurs Acte(s) et sueurs</td>\n",
" <td>0.002237</td>\n",
" <td>0.982486</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>13</th>\n",
" <td>Presse 2021</td>\n",
" <td>0.002200</td>\n",
" <td>0.984190</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>20</th>\n",
" <td>Spectateurs-rices SpaceSongs</td>\n",
" <td>0.002069</td>\n",
" <td>0.985792</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
2024-03-23 10:48:47 +01:00
" <td>Presse Tous Terriens</td>\n",
" <td>0.002069</td>\n",
" <td>0.987395</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>46</th>\n",
" <td>nouveaux acheteurs 23-24 - relance Mazut</td>\n",
" <td>0.002032</td>\n",
" <td>0.988969</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>16</th>\n",
" <td>Presse communqiué agglo</td>\n",
" <td>0.001920</td>\n",
" <td>0.990456</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>14</th>\n",
" <td>Presse 22-23</td>\n",
" <td>0.001883</td>\n",
" <td>0.991914</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>31</th>\n",
" <td>fichier presse grand t 23-24export</td>\n",
" <td>0.001864</td>\n",
" <td>0.993358</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>3</th>\n",
" <td>Bottière - jumelage (contacts proches projet</td>\n",
" <td>0.001566</td>\n",
" <td>0.994571</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>11</th>\n",
" <td>P6S</td>\n",
" <td>0.001286</td>\n",
" <td>0.995567</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>43</th>\n",
" <td>mails permanents gd t fev23</td>\n",
" <td>0.000932</td>\n",
" <td>0.996289</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>34</th>\n",
" <td>fichiercontactstous_enssup_relance étudiants</td>\n",
" <td>0.000820</td>\n",
" <td>0.996925</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>42</th>\n",
" <td>mails intervenants mdla fev23</td>\n",
" <td>0.000522</td>\n",
" <td>0.997329</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>48</th>\n",
" <td>spec Jessica</td>\n",
" <td>0.000522</td>\n",
" <td>0.997733</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>7</th>\n",
" <td>Invit conf presse EUH : artistes + DRAC</td>\n",
" <td>0.000466</td>\n",
" <td>0.998094</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>10</th>\n",
" <td>P2S</td>\n",
" <td>0.000392</td>\n",
" <td>0.998397</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>39</th>\n",
" <td>liste intervenants mdla_newsletter temuda</td>\n",
" <td>0.000392</td>\n",
" <td>0.998701</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>12</th>\n",
" <td>Personnel du Grand T</td>\n",
" <td>0.000354</td>\n",
" <td>0.998975</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>44</th>\n",
" <td>mails permanents mdla fev23</td>\n",
" <td>0.000336</td>\n",
" <td>0.999235</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>32</th>\n",
" <td>fichier-sante-social-terminato</td>\n",
" <td>0.000317</td>\n",
" <td>0.999480</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>40</th>\n",
" <td>liste permanents mdla_newsletter temuda</td>\n",
" <td>0.000317</td>\n",
" <td>0.999726</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>17</th>\n",
" <td>Presse éducation</td>\n",
" <td>0.000131</td>\n",
" <td>0.999827</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>38</th>\n",
" <td>liste billetterie</td>\n",
" <td>0.000056</td>\n",
" <td>0.999870</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>6</th>\n",
" <td>Equipe com Grand T</td>\n",
" <td>0.000056</td>\n",
" <td>0.999913</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>45</th>\n",
" <td>news jumelage Bottière ajouts</td>\n",
" <td>0.000056</td>\n",
" <td>0.999957</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>24</th>\n",
" <td>emails yohann et tiphaine</td>\n",
" <td>0.000056</td>\n",
" <td>1.000000</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-23 10:48:47 +01:00
" target_name customer_id \\\n",
"9 Newsletter mensuelle 0.722068 \n",
"2 Blackliste 0.188669 \n",
"36 import opt-in février 2023 0.068141 \n",
"37 import opt-out fév 23 0.055016 \n",
"5 Contacts opt-out 20.10.22 0.047726 \n",
"21 Théâtre amateur 0.033054 \n",
"41 liste théâtre amateur 0.026641 \n",
"4 CP EUH 7 octobre 2022 0.026492 \n",
"47 spec Falaise - relance Mazut 0.020452 \n",
"35 fichierspectateurs_recreatrales 0.016052 \n",
"29 fichier news quartier grand t 0.007644 \n",
"33 fichier2 news quartier grand t 0.007383 \n",
"23 contacts amateurs 22-23 ok ok 0.006842 \n",
"22 amateurs incandescences oct 22 0.006730 \n",
"25 fichier invit Soir de fête 0.005984 \n",
"19 Spectateurs Ce qu'il faut dire 0.004959 \n",
"49 spec Mazut - tout 0.003300 \n",
"26 fichier invitation déjeuner chantier le 8 juin 0.003151 \n",
"0 3 M - spec du dimanche (toutes rep) 0.003132 \n",
"30 fichier op les dodos 21 nov global 0.003020 \n",
"8 Invitation protocolaire Les Fauves 0.002573 \n",
"1 3 M - spec du samedi (toutes rep) 0.002554 \n",
"28 fichier jumelage halveque 0.002498 \n",
"27 fichier invitations pro les enfants d'amazi 0.002256 \n",
"18 Spectateurs Acte(s) et sueurs 0.002237 \n",
"13 Presse 2021 0.002200 \n",
"20 Spectateurs-rices SpaceSongs 0.002069 \n",
"15 Presse Tous Terriens 0.002069 \n",
"46 nouveaux acheteurs 23-24 - relance Mazut 0.002032 \n",
"16 Presse communqiué agglo 0.001920 \n",
"14 Presse 22-23 0.001883 \n",
"31 fichier presse grand t 23-24export 0.001864 \n",
"3 Bottière - jumelage (contacts proches projet 0.001566 \n",
"11 P6S 0.001286 \n",
"43 mails permanents gd t fev23 0.000932 \n",
"34 fichiercontactstous_enssup_relance étudiants 0.000820 \n",
"42 mails intervenants mdla fev23 0.000522 \n",
"48 spec Jessica 0.000522 \n",
"7 Invit conf presse EUH : artistes + DRAC 0.000466 \n",
"10 P2S 0.000392 \n",
"39 liste intervenants mdla_newsletter temuda 0.000392 \n",
"12 Personnel du Grand T 0.000354 \n",
"44 mails permanents mdla fev23 0.000336 \n",
"32 fichier-sante-social-terminato 0.000317 \n",
"40 liste permanents mdla_newsletter temuda 0.000317 \n",
"17 Presse éducation 0.000131 \n",
"38 liste billetterie 0.000056 \n",
"6 Equipe com Grand T 0.000056 \n",
"45 news jumelage Bottière ajouts 0.000056 \n",
"24 emails yohann et tiphaine 0.000056 \n",
2024-03-13 23:24:38 +01:00
"\n",
2024-03-23 10:48:47 +01:00
" cumulative_customers \n",
"9 0.559228 \n",
"2 0.705348 \n",
"36 0.758122 \n",
"37 0.800731 \n",
"5 0.837694 \n",
"21 0.863294 \n",
"41 0.883927 \n",
"4 0.904444 \n",
"47 0.920284 \n",
"35 0.932715 \n",
"29 0.938635 \n",
"33 0.944353 \n",
"23 0.949652 \n",
"22 0.954864 \n",
"25 0.959499 \n",
"19 0.963340 \n",
"49 0.965896 \n",
"26 0.968336 \n",
"0 0.970762 \n",
"30 0.973101 \n",
"8 0.975093 \n",
"1 0.977071 \n",
"28 0.979006 \n",
"27 0.980753 \n",
"18 0.982486 \n",
"13 0.984190 \n",
"20 0.985792 \n",
"15 0.987395 \n",
"46 0.988969 \n",
"16 0.990456 \n",
"14 0.991914 \n",
"31 0.993358 \n",
"3 0.994571 \n",
"11 0.995567 \n",
"43 0.996289 \n",
"34 0.996925 \n",
"42 0.997329 \n",
"48 0.997733 \n",
"7 0.998094 \n",
"10 0.998397 \n",
"39 0.998701 \n",
"12 0.998975 \n",
"44 0.999235 \n",
"32 0.999480 \n",
"40 0.999726 \n",
"17 0.999827 \n",
"38 0.999870 \n",
"6 0.999913 \n",
"45 0.999957 \n",
"24 1.000000 "
2024-03-13 23:24:38 +01:00
]
},
2024-03-23 10:48:47 +01:00
"execution_count": 13,
2024-03-13 23:24:38 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-23 10:48:47 +01:00
"print_main_target('10', 100)"
2024-03-13 23:24:38 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 14,
"id": "76c818a5-3c52-4d97-ac81-b7f3f89092bd",
"metadata": {
"scrolled": true
},
2024-03-13 23:24:38 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-23 10:48:47 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n",
"Nombre de ciblage : 124302\n",
"Nombre de client avec étiquette target : 62915\n"
2024-03-13 23:24:38 +01:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>50</th>\n",
" <td>Temp - DOUBLE OPTIN</td>\n",
" <td>0.410983</td>\n",
" <td>0.208018</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>31</th>\n",
" <td>Nombre de représentations = 1</td>\n",
" <td>0.330128</td>\n",
" <td>0.375111</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>65</th>\n",
" <td>primo-spectateurs (fidélité = 1)</td>\n",
" <td>0.247811</td>\n",
" <td>0.500539</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>0</th>\n",
" <td>Acheteurs réguliers (fidélité &gt;= 2)</td>\n",
" <td>0.126202</td>\n",
" <td>0.564416</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>34</th>\n",
" <td>Nombre de représentations = ou &gt; 4</td>\n",
" <td>0.085290</td>\n",
" <td>0.607585</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>2</th>\n",
" <td>Brochure janvier-juin 2023</td>\n",
" <td>0.076929</td>\n",
" <td>0.646522</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>30</th>\n",
" <td>NEWSLETTER</td>\n",
" <td>0.074370</td>\n",
" <td>0.684164</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>5</th>\n",
" <td>Demande brochure sept-déc 23</td>\n",
" <td>0.071541</td>\n",
" <td>0.720375</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>6</th>\n",
" <td>Demande brochure sept-déc 23 DEF</td>\n",
" <td>0.071461</td>\n",
" <td>0.756545</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>32</th>\n",
" <td>Nombre de représentations = 2</td>\n",
" <td>0.071016</td>\n",
" <td>0.792489</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>52</th>\n",
" <td>Waterproof_2023</td>\n",
" <td>0.065326</td>\n",
" <td>0.825554</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>46</th>\n",
" <td>Relance Poppée 08/09/23</td>\n",
" <td>0.057077</td>\n",
" <td>0.854443</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>28</th>\n",
" <td>Luisa Miller_ciblé</td>\n",
" <td>0.052404</td>\n",
" <td>0.880967</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
2024-03-23 10:48:47 +01:00
" <td>En dernier lieu</td>\n",
" <td>0.036287</td>\n",
" <td>0.899334</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>1</th>\n",
" <td>Bilan Carmen danse</td>\n",
" <td>0.030660</td>\n",
" <td>0.914853</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>33</th>\n",
" <td>Nombre de représentations = 3</td>\n",
" <td>0.029023</td>\n",
" <td>0.929543</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
2024-03-23 10:48:47 +01:00
" <td>Inscription Newsletter</td>\n",
" <td>0.019026</td>\n",
" <td>0.939172</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>4</th>\n",
" <td>Code postal 56</td>\n",
" <td>0.015481</td>\n",
" <td>0.947008</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>23</th>\n",
" <td>Les nuits d'été - mail ciblé 13/10/23</td>\n",
" <td>0.011825</td>\n",
" <td>0.952994</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>42</th>\n",
" <td>Poppée 5, 7 et 8 octobre 23</td>\n",
" <td>0.008901</td>\n",
" <td>0.957499</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>43</th>\n",
" <td>Promo musique du monde Mawâl de la terre</td>\n",
" <td>0.008122</td>\n",
" <td>0.961610</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>38</th>\n",
" <td>PRESSE NATIONALE</td>\n",
" <td>0.006978</td>\n",
" <td>0.965141</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>20</th>\n",
" <td>L'Élixir d'amour 11, 13 mai</td>\n",
" <td>0.006390</td>\n",
" <td>0.968375</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>12</th>\n",
" <td>Florilège mail ciblé</td>\n",
" <td>0.006390</td>\n",
" <td>0.971609</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>22</th>\n",
" <td>Les Nuits d'été - avant spectacle</td>\n",
" <td>0.006056</td>\n",
" <td>0.974675</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>11</th>\n",
" <td>Enquête_Bal de Paris</td>\n",
" <td>0.005786</td>\n",
" <td>0.977603</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>27</th>\n",
" <td>Luisa Miller 23 mars</td>\n",
" <td>0.004339</td>\n",
" <td>0.979799</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>24</th>\n",
" <td>Les nuits d'été ajout - mail ciblé 13/10/23</td>\n",
" <td>0.003465</td>\n",
" <td>0.981553</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>13</th>\n",
" <td>GRANDS EVENEMENTS</td>\n",
" <td>0.003386</td>\n",
" <td>0.983267</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
2024-03-23 10:48:47 +01:00
" <td>Poppée 3 octobre 23</td>\n",
" <td>0.002416</td>\n",
" <td>0.984489</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>62</th>\n",
" <td>liste mécénat et prospect</td>\n",
" <td>0.002273</td>\n",
" <td>0.985640</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>40</th>\n",
" <td>Poppée 1 octobre 23 uniquement</td>\n",
" <td>0.002162</td>\n",
" <td>0.986734</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>21</th>\n",
" <td>L'Élixir d'amour 5, 7, 9 mai</td>\n",
" <td>0.002098</td>\n",
" <td>0.987796</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>37</th>\n",
" <td>PRESSE LOCALE</td>\n",
" <td>0.001891</td>\n",
" <td>0.988753</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>26</th>\n",
" <td>Luisa Miller 23 et 25 mars</td>\n",
" <td>0.001764</td>\n",
" <td>0.989646</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>59</th>\n",
" <td>liste invites grand boum</td>\n",
" <td>0.001637</td>\n",
" <td>0.990475</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>16</th>\n",
" <td>INVITS PREMIERES</td>\n",
" <td>0.001494</td>\n",
" <td>0.991231</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>25</th>\n",
" <td>Luisa Miller 19 et 21 mars</td>\n",
" <td>0.001446</td>\n",
" <td>0.991963</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>19</th>\n",
" <td>Invités TNB</td>\n",
" <td>0.001208</td>\n",
" <td>0.992575</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>44</th>\n",
" <td>Protocole : REGIONAUX</td>\n",
" <td>0.001160</td>\n",
" <td>0.993162</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>54</th>\n",
" <td>assos danse</td>\n",
" <td>0.001160</td>\n",
" <td>0.993749</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>45</th>\n",
" <td>Protocole Objectif Choeurs</td>\n",
" <td>0.001160</td>\n",
" <td>0.994336</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>51</th>\n",
" <td>Titulaires cartes Opéra</td>\n",
" <td>0.000985</td>\n",
" <td>0.994835</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>7</th>\n",
" <td>Ecoles élémentaires Rennes</td>\n",
" <td>0.000954</td>\n",
" <td>0.995318</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>53</th>\n",
" <td>Zaïde 10 et 12 février</td>\n",
" <td>0.000827</td>\n",
" <td>0.995736</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>60</th>\n",
" <td>liste invités soirée 11 septembre mba</td>\n",
" <td>0.000795</td>\n",
" <td>0.996138</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>17</th>\n",
" <td>Info Tutelle</td>\n",
" <td>0.000795</td>\n",
" <td>0.996541</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>61</th>\n",
" <td>liste invités soirée 11 septembre mba 2</td>\n",
" <td>0.000763</td>\n",
" <td>0.996927</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>14</th>\n",
" <td>Hira Gasy Spectateurs</td>\n",
" <td>0.000731</td>\n",
" <td>0.997297</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>48</th>\n",
" <td>Spectateurs Passion selon Brockes</td>\n",
" <td>0.000636</td>\n",
" <td>0.997619</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>47</th>\n",
" <td>Spectateurs Oratorios pour Passion</td>\n",
" <td>0.000525</td>\n",
" <td>0.997884</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>36</th>\n",
" <td>PERSONNEL OPERA</td>\n",
" <td>0.000493</td>\n",
" <td>0.998134</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>57</th>\n",
" <td>invités représentation du 1er octobre2023</td>\n",
" <td>0.000445</td>\n",
" <td>0.998359</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>64</th>\n",
" <td>mécène - vernissage frac</td>\n",
" <td>0.000429</td>\n",
" <td>0.998576</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>15</th>\n",
" <td>INVITS CONF PRESSE ETE 2020</td>\n",
" <td>0.000381</td>\n",
" <td>0.998769</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>8</th>\n",
" <td>Elixir d'amour 15/04 14h30</td>\n",
" <td>0.000350</td>\n",
" <td>0.998946</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>9</th>\n",
" <td>Elixir d'amour 15/04 17h30</td>\n",
" <td>0.000350</td>\n",
" <td>0.999123</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>39</th>\n",
" <td>PRESSE NUMERIQUE</td>\n",
" <td>0.000334</td>\n",
" <td>0.999292</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>55</th>\n",
" <td>invitations représentation 3 octobre2023</td>\n",
" <td>0.000334</td>\n",
" <td>0.999461</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>58</th>\n",
" <td>liste chargé.e.s de communication</td>\n",
" <td>0.000334</td>\n",
" <td>0.999630</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>29</th>\n",
" <td>MECENES</td>\n",
" <td>0.000207</td>\n",
" <td>0.999735</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>56</th>\n",
" <td>invités représentation 1er octobre2023</td>\n",
" <td>0.000207</td>\n",
" <td>0.999839</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>35</th>\n",
" <td>OPERA</td>\n",
" <td>0.000159</td>\n",
" <td>0.999920</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>63</th>\n",
" <td>liste mécénat et prospect 2 - erreur mail</td>\n",
" <td>0.000064</td>\n",
" <td>0.999952</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>49</th>\n",
" <td>TEST ENVOI</td>\n",
" <td>0.000064</td>\n",
" <td>0.999984</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>3</th>\n",
" <td>Choeur ouvert 22-23</td>\n",
" <td>0.000032</td>\n",
" <td>1.000000</td>\n",
2024-03-13 23:24:38 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-23 10:48:47 +01:00
" target_name customer_id \\\n",
"50 Temp - DOUBLE OPTIN 0.410983 \n",
"31 Nombre de représentations = 1 0.330128 \n",
"65 primo-spectateurs (fidélité = 1) 0.247811 \n",
"0 Acheteurs réguliers (fidélité >= 2) 0.126202 \n",
"34 Nombre de représentations = ou > 4 0.085290 \n",
"2 Brochure janvier-juin 2023 0.076929 \n",
"30 NEWSLETTER 0.074370 \n",
"5 Demande brochure sept-déc 23 0.071541 \n",
"6 Demande brochure sept-déc 23 DEF 0.071461 \n",
"32 Nombre de représentations = 2 0.071016 \n",
"52 Waterproof_2023 0.065326 \n",
"46 Relance Poppée 08/09/23 0.057077 \n",
"28 Luisa Miller_ciblé 0.052404 \n",
"10 En dernier lieu 0.036287 \n",
"1 Bilan Carmen danse 0.030660 \n",
"33 Nombre de représentations = 3 0.029023 \n",
"18 Inscription Newsletter 0.019026 \n",
"4 Code postal 56 0.015481 \n",
"23 Les nuits d'été - mail ciblé 13/10/23 0.011825 \n",
"42 Poppée 5, 7 et 8 octobre 23 0.008901 \n",
"43 Promo musique du monde Mawâl de la terre 0.008122 \n",
"38 PRESSE NATIONALE 0.006978 \n",
"20 L'Élixir d'amour 11, 13 mai 0.006390 \n",
"12 Florilège mail ciblé 0.006390 \n",
"22 Les Nuits d'été - avant spectacle 0.006056 \n",
"11 Enquête_Bal de Paris 0.005786 \n",
"27 Luisa Miller 23 mars 0.004339 \n",
"24 Les nuits d'été ajout - mail ciblé 13/10/23 0.003465 \n",
"13 GRANDS EVENEMENTS 0.003386 \n",
"41 Poppée 3 octobre 23 0.002416 \n",
"62 liste mécénat et prospect 0.002273 \n",
"40 Poppée 1 octobre 23 uniquement 0.002162 \n",
"21 L'Élixir d'amour 5, 7, 9 mai 0.002098 \n",
"37 PRESSE LOCALE 0.001891 \n",
"26 Luisa Miller 23 et 25 mars 0.001764 \n",
"59 liste invites grand boum 0.001637 \n",
"16 INVITS PREMIERES 0.001494 \n",
"25 Luisa Miller 19 et 21 mars 0.001446 \n",
"19 Invités TNB 0.001208 \n",
"44 Protocole : REGIONAUX 0.001160 \n",
"54 assos danse 0.001160 \n",
"45 Protocole Objectif Choeurs 0.001160 \n",
"51 Titulaires cartes Opéra 0.000985 \n",
"7 Ecoles élémentaires Rennes 0.000954 \n",
"53 Zaïde 10 et 12 février 0.000827 \n",
"60 liste invités soirée 11 septembre mba 0.000795 \n",
"17 Info Tutelle 0.000795 \n",
"61 liste invités soirée 11 septembre mba 2 0.000763 \n",
"14 Hira Gasy Spectateurs 0.000731 \n",
"48 Spectateurs Passion selon Brockes 0.000636 \n",
"47 Spectateurs Oratorios pour Passion 0.000525 \n",
"36 PERSONNEL OPERA 0.000493 \n",
"57 invités représentation du 1er octobre2023 0.000445 \n",
"64 mécène - vernissage frac 0.000429 \n",
"15 INVITS CONF PRESSE ETE 2020 0.000381 \n",
"8 Elixir d'amour 15/04 14h30 0.000350 \n",
"9 Elixir d'amour 15/04 17h30 0.000350 \n",
"39 PRESSE NUMERIQUE 0.000334 \n",
"55 invitations représentation 3 octobre2023 0.000334 \n",
"58 liste chargé.e.s de communication 0.000334 \n",
"29 MECENES 0.000207 \n",
"56 invités représentation 1er octobre2023 0.000207 \n",
"35 OPERA 0.000159 \n",
"63 liste mécénat et prospect 2 - erreur mail 0.000064 \n",
"49 TEST ENVOI 0.000064 \n",
"3 Choeur ouvert 22-23 0.000032 \n",
2024-03-13 23:24:38 +01:00
"\n",
2024-03-23 10:48:47 +01:00
" cumulative_customers \n",
"50 0.208018 \n",
"31 0.375111 \n",
"65 0.500539 \n",
"0 0.564416 \n",
"34 0.607585 \n",
"2 0.646522 \n",
"30 0.684164 \n",
"5 0.720375 \n",
"6 0.756545 \n",
"32 0.792489 \n",
"52 0.825554 \n",
"46 0.854443 \n",
"28 0.880967 \n",
"10 0.899334 \n",
"1 0.914853 \n",
"33 0.929543 \n",
"18 0.939172 \n",
"4 0.947008 \n",
"23 0.952994 \n",
"42 0.957499 \n",
"43 0.961610 \n",
"38 0.965141 \n",
"20 0.968375 \n",
"12 0.971609 \n",
"22 0.974675 \n",
"11 0.977603 \n",
"27 0.979799 \n",
"24 0.981553 \n",
"13 0.983267 \n",
"41 0.984489 \n",
"62 0.985640 \n",
"40 0.986734 \n",
"21 0.987796 \n",
"37 0.988753 \n",
"26 0.989646 \n",
"59 0.990475 \n",
"16 0.991231 \n",
"25 0.991963 \n",
"19 0.992575 \n",
"44 0.993162 \n",
"54 0.993749 \n",
"45 0.994336 \n",
"51 0.994835 \n",
"7 0.995318 \n",
"53 0.995736 \n",
"60 0.996138 \n",
"17 0.996541 \n",
"61 0.996927 \n",
"14 0.997297 \n",
"48 0.997619 \n",
"47 0.997884 \n",
"36 0.998134 \n",
"57 0.998359 \n",
"64 0.998576 \n",
"15 0.998769 \n",
"8 0.998946 \n",
"9 0.999123 \n",
"39 0.999292 \n",
"55 0.999461 \n",
"58 0.999630 \n",
"29 0.999735 \n",
"56 0.999839 \n",
"35 0.999920 \n",
"63 0.999952 \n",
"49 0.999984 \n",
"3 1.000000 "
2024-03-13 23:24:38 +01:00
]
},
2024-03-23 10:48:47 +01:00
"execution_count": 14,
2024-03-13 23:24:38 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-23 10:48:47 +01:00
"print_main_target('11', 100)\n"
2024-02-26 22:47:36 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 15,
"id": "603b11e4-5d76-4699-a1b2-e795929edc04",
2024-03-13 23:24:38 +01:00
"metadata": {
2024-03-23 10:48:47 +01:00
"scrolled": true
2024-03-13 23:24:38 +01:00
},
2024-02-26 22:47:36 +01:00
"outputs": [
2024-02-25 18:33:24 +01:00
{
2024-02-25 23:53:10 +01:00
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-23 10:48:47 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n",
"Nombre de ciblage : 1409140\n",
"Nombre de client avec étiquette target : 242726\n"
2024-02-25 18:33:24 +01:00
]
2024-02-25 23:53:10 +01:00
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-23 10:48:47 +01:00
" <th>target_name</th>\n",
2024-02-25 23:53:10 +01:00
" <th>customer_id</th>\n",
2024-03-23 10:48:47 +01:00
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>CAT/buit all</td>\n",
" <td>0.755387</td>\n",
" <td>0.130116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>274</th>\n",
" <td>lista bbdd opt-in</td>\n",
" <td>0.510559</td>\n",
" <td>0.218061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>157</th>\n",
" <td>Obren mails</td>\n",
" <td>0.398466</td>\n",
" <td>0.286697</td>\n",
" </tr>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>Participantes por primera vez</td>\n",
" <td>0.376012</td>\n",
" <td>0.351465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>267</th>\n",
" <td>buit all</td>\n",
" <td>0.355907</td>\n",
" <td>0.412771</td>\n",
" </tr>\n",
" <tr>\n",
" <th>158</th>\n",
" <td>Obren mails CAT</td>\n",
" <td>0.302773</td>\n",
" <td>0.464924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>ES all</td>\n",
" <td>0.218934</td>\n",
" <td>0.502636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>Festivals 19-21-22-23 CAT</td>\n",
" <td>0.213348</td>\n",
" <td>0.539385</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>Comptes cashless - tots</td>\n",
" <td>0.176883</td>\n",
" <td>0.569853</td>\n",
" </tr>\n",
" <tr>\n",
" <th>265</th>\n",
" <td>allcomptespersonalsambnom</td>\n",
" <td>0.176883</td>\n",
" <td>0.600321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>264</th>\n",
" <td>allcomptespersonals</td>\n",
" <td>0.176878</td>\n",
" <td>0.630789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>268</th>\n",
" <td>compradores habituales</td>\n",
" <td>0.155327</td>\n",
" <td>0.657544</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>Compradors CE2023</td>\n",
" <td>0.135478</td>\n",
" <td>0.680881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>All CE2023 CAT</td>\n",
" <td>0.116329</td>\n",
" <td>0.700918</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>Obren mails ES</td>\n",
" <td>0.089224</td>\n",
" <td>0.716287</td>\n",
" </tr>\n",
" <tr>\n",
" <th>263</th>\n",
" <td>all unsubscribed 09.03.23</td>\n",
" <td>0.087766</td>\n",
" <td>0.731405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>All unsubscribed 09.03.23</td>\n",
" <td>0.087766</td>\n",
" <td>0.746523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>Festivals 19-21-22-23 ES</td>\n",
" <td>0.077594</td>\n",
" <td>0.759888</td>\n",
" </tr>\n",
" <tr>\n",
" <th>273</th>\n",
" <td>lista bbdd opt out</td>\n",
" <td>0.076214</td>\n",
" <td>0.773016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>171</th>\n",
" <td>Primer control de acceso 07/07/23</td>\n",
" <td>0.074710</td>\n",
" <td>0.785885</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>All CE2023 ES</td>\n",
" <td>0.063673</td>\n",
" <td>0.796853</td>\n",
" </tr>\n",
" <tr>\n",
" <th>181</th>\n",
" <td>Push Joan Miquel Oliver CAT</td>\n",
" <td>0.056174</td>\n",
" <td>0.806529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200</th>\n",
" <td>Segments Joan Miquel Oliver AX</td>\n",
" <td>0.049978</td>\n",
" <td>0.815138</td>\n",
" </tr>\n",
" <tr>\n",
" <th>204</th>\n",
" <td>Segments jajas</td>\n",
" <td>0.039996</td>\n",
" <td>0.822027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>199</th>\n",
" <td>Segments Fatoumata PowerBI</td>\n",
" <td>0.037071</td>\n",
" <td>0.828412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>278</th>\n",
" <td>segments fatoumata powerbi</td>\n",
" <td>0.037071</td>\n",
" <td>0.834798</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>Divendres CE2023</td>\n",
" <td>0.036547</td>\n",
" <td>0.841093</td>\n",
" </tr>\n",
" <tr>\n",
" <th>205</th>\n",
" <td>Segments jajas CAT</td>\n",
" <td>0.032349</td>\n",
" <td>0.846665</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>Dijous CE2023</td>\n",
" <td>0.028448</td>\n",
" <td>0.851565</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>Dissabte CE2023</td>\n",
" <td>0.028168</td>\n",
" <td>0.856417</td>\n",
" </tr>\n",
" <tr>\n",
" <th>201</th>\n",
" <td>Segments Joan Miquel Oliver PowerBI</td>\n",
" <td>0.027426</td>\n",
" <td>0.861142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>279</th>\n",
" <td>segments joan miquel oliver powerbi</td>\n",
" <td>0.027426</td>\n",
" <td>0.865866</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178</th>\n",
" <td>Push Fatoumata CAT</td>\n",
" <td>0.027253</td>\n",
" <td>0.870560</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93</th>\n",
" <td>EN all</td>\n",
" <td>0.025675</td>\n",
" <td>0.874983</td>\n",
" </tr>\n",
" <tr>\n",
" <th>203</th>\n",
" <td>Segments Pinpilincinos PowerBI</td>\n",
" <td>0.020851</td>\n",
" <td>0.878574</td>\n",
" </tr>\n",
" <tr>\n",
" <th>280</th>\n",
" <td>segments pinpilincinos powerbi</td>\n",
" <td>0.020851</td>\n",
" <td>0.882166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>Dimecres CE2023</td>\n",
" <td>0.020702</td>\n",
" <td>0.885732</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>Divendres CE2023 CAT</td>\n",
" <td>0.020463</td>\n",
" <td>0.889257</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>All CE2023 buit</td>\n",
" <td>0.020002</td>\n",
" <td>0.892702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>276</th>\n",
" <td>regalentradesrondes</td>\n",
" <td>0.019458</td>\n",
" <td>0.896054</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>Assistents concerts last year</td>\n",
" <td>0.019458</td>\n",
" <td>0.899405</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>183</th>\n",
" <td>Push Pinpilincinos CAT</td>\n",
" <td>0.018708</td>\n",
" <td>0.902628</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>266</th>\n",
" <td>barres comedy festival</td>\n",
" <td>0.018066</td>\n",
" <td>0.905740</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>44</th>\n",
" <td>Barres Comedy festival</td>\n",
" <td>0.018066</td>\n",
" <td>0.908851</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>270</th>\n",
" <td>in risus</td>\n",
" <td>0.018033</td>\n",
" <td>0.911958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Compradors In Risus</td>\n",
" <td>0.018033</td>\n",
" <td>0.915064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>182</th>\n",
" <td>Push Joan Miquel Oliver ES</td>\n",
" <td>0.017205</td>\n",
" <td>0.918027</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-02-26 22:47:36 +01:00
" <th>22</th>\n",
2024-03-23 10:48:47 +01:00
" <td>All abo wkd-2dies CE2023</td>\n",
" <td>0.017097</td>\n",
" <td>0.920972</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>84</th>\n",
" <td>Dissabte CE2023 CAT</td>\n",
" <td>0.016080</td>\n",
" <td>0.923742</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>42</th>\n",
" <td>Assistents concerts last year CAT</td>\n",
" <td>0.015878</td>\n",
" <td>0.926477</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>9</th>\n",
" <td>Abonament Weekend ST</td>\n",
" <td>0.015400</td>\n",
" <td>0.929130</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>176</th>\n",
" <td>Push Al·lèrgiques CAT</td>\n",
" <td>0.014642</td>\n",
" <td>0.931652</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>74</th>\n",
" <td>Dijous CE2023 CAT</td>\n",
" <td>0.014139</td>\n",
" <td>0.934087</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>196</th>\n",
" <td>Segments Al·lèrgiques AX</td>\n",
" <td>0.014041</td>\n",
" <td>0.936506</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>180</th>\n",
" <td>Push Fatoumata ES</td>\n",
" <td>0.012158</td>\n",
" <td>0.938600</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>79</th>\n",
" <td>Dimecres CE2023 CAT</td>\n",
" <td>0.010287</td>\n",
" <td>0.940372</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>23</th>\n",
" <td>All abo wkd-2dies CE2023 CAT</td>\n",
" <td>0.009888</td>\n",
" <td>0.942075</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>91</th>\n",
" <td>Divendres CE2023 ES</td>\n",
" <td>0.009500</td>\n",
" <td>0.943712</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>112</th>\n",
" <td>Funzo &amp; Baby Loud CT2023</td>\n",
" <td>0.009064</td>\n",
" <td>0.945273</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-02-26 22:47:36 +01:00
" <th>17</th>\n",
2024-03-23 10:48:47 +01:00
" <td>All CE2023 EN</td>\n",
" <td>0.008232</td>\n",
" <td>0.946691</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>76</th>\n",
" <td>Dijous CE2023 ES</td>\n",
" <td>0.008001</td>\n",
" <td>0.948069</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>20</th>\n",
" <td>All abo full-4dies CE2023</td>\n",
" <td>0.007980</td>\n",
" <td>0.949444</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>184</th>\n",
" <td>Push Pinpilincinos ES</td>\n",
" <td>0.007535</td>\n",
" <td>0.950742</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
2024-03-23 10:48:47 +01:00
" <tr>\n",
" <th>206</th>\n",
" <td>Segments jajas ES</td>\n",
" <td>0.007412</td>\n",
" <td>0.952018</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>115</th>\n",
" <td>Funzo &amp; Baby Loud CT2023 buit</td>\n",
" <td>0.007321</td>\n",
" <td>0.953279</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
2024-03-23 10:48:47 +01:00
" <tr>\n",
" <th>236</th>\n",
" <td>The Tyets CT2024 16.11</td>\n",
" <td>0.006798</td>\n",
" <td>0.954450</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>Dissabte CE2023 ES</td>\n",
" <td>0.006691</td>\n",
" <td>0.955603</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Abonament Full ST</td>\n",
" <td>0.006674</td>\n",
" <td>0.956752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>159</th>\n",
" <td>Obren mails EN</td>\n",
" <td>0.006468</td>\n",
" <td>0.957867</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-02-26 22:47:36 +01:00
" <th>0</th>\n",
2024-03-23 10:48:47 +01:00
" <td>31 FAM CT2023</td>\n",
" <td>0.006468</td>\n",
" <td>0.958981</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202</th>\n",
" <td>Segments Pinpilincinos AX</td>\n",
" <td>0.006279</td>\n",
" <td>0.960062</td>\n",
" </tr>\n",
" <tr>\n",
" <th>211</th>\n",
" <td>Sen Senra CH2024</td>\n",
" <td>0.006007</td>\n",
" <td>0.961097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>Dimecres CE2023 ES</td>\n",
" <td>0.005735</td>\n",
" <td>0.962085</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>All abo full-4dies CE2023 CAT</td>\n",
" <td>0.005451</td>\n",
" <td>0.963024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>239</th>\n",
" <td>The Tyets CT2024 16.11 buit</td>\n",
" <td>0.005414</td>\n",
" <td>0.963956</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>Dijous CE2023 buit</td>\n",
" <td>0.005257</td>\n",
" <td>0.964862</td>\n",
" </tr>\n",
" <tr>\n",
" <th>193</th>\n",
" <td>SF 7 anys o més</td>\n",
" <td>0.005203</td>\n",
" <td>0.965758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>281</th>\n",
" <td>sf7anysomes</td>\n",
" <td>0.005203</td>\n",
" <td>0.966654</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>All abo wkd-2dies CE2023 ES</td>\n",
" <td>0.005034</td>\n",
" <td>0.967521</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>Segments Al·lèrgiques ST</td>\n",
" <td>0.004866</td>\n",
" <td>0.968359</td>\n",
" </tr>\n",
" <tr>\n",
" <th>277</th>\n",
" <td>segments al·lèrgiques st</td>\n",
" <td>0.004866</td>\n",
" <td>0.969198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>260</th>\n",
" <td>XXS Comedy</td>\n",
" <td>0.004837</td>\n",
" <td>0.970031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>92</th>\n",
" <td>Divendres CE2023 buit</td>\n",
" <td>0.004828</td>\n",
" <td>0.970862</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>Dissabte CE2023 buit</td>\n",
" <td>0.004614</td>\n",
" <td>0.971657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>215</th>\n",
" <td>Sen Senra CH2024 buit</td>\n",
" <td>0.004511</td>\n",
" <td>0.972434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>Dimecres CE2023 buit</td>\n",
" <td>0.004421</td>\n",
" <td>0.973196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Abo 3 dies CE2023</td>\n",
" <td>0.004272</td>\n",
" <td>0.973932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>Segments Fatoumata AX</td>\n",
" <td>0.004219</td>\n",
" <td>0.974658</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>Convidats pro</td>\n",
" <td>0.003980</td>\n",
" <td>0.975344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>269</th>\n",
" <td>identified_contacts</td>\n",
" <td>0.003976</td>\n",
" <td>0.976029</td>\n",
" </tr>\n",
" <tr>\n",
" <th>177</th>\n",
" <td>Push Al·lèrgiques ES</td>\n",
" <td>0.003885</td>\n",
" <td>0.976698</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254</th>\n",
" <td>Viva Suecia CH2024</td>\n",
" <td>0.003770</td>\n",
" <td>0.977347</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220</th>\n",
" <td>Sidonie CH2024</td>\n",
" <td>0.003683</td>\n",
" <td>0.977982</td>\n",
" </tr>\n",
" <tr>\n",
" <th>194</th>\n",
" <td>SF 7 anys o més CAT</td>\n",
" <td>0.003531</td>\n",
" <td>0.978590</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>Assistents concerts last year ES</td>\n",
" <td>0.003518</td>\n",
" <td>0.979196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232</th>\n",
" <td>The Tyets CT2024 15.11</td>\n",
" <td>0.003502</td>\n",
" <td>0.979799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>283</th>\n",
" <td>usuaris cruïlla green day</td>\n",
" <td>0.003242</td>\n",
" <td>0.980358</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240</th>\n",
" <td>Usuaris Cruïlla Green Day</td>\n",
" <td>0.003242</td>\n",
" <td>0.980916</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-23 10:48:47 +01:00
" <td>31 FAM CT2023 CAT</td>\n",
" <td>0.003176</td>\n",
" <td>0.981463</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>Cupido CH2024</td>\n",
" <td>0.003148</td>\n",
" <td>0.982005</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-23 10:48:47 +01:00
" target_name customer_id cumulative_customers\n",
"45 CAT/buit all 0.755387 0.130116\n",
"274 lista bbdd opt-in 0.510559 0.218061\n",
"157 Obren mails 0.398466 0.286697\n",
"165 Participantes por primera vez 0.376012 0.351465\n",
"267 buit all 0.355907 0.412771\n",
"158 Obren mails CAT 0.302773 0.464924\n",
"94 ES all 0.218934 0.502636\n",
"105 Festivals 19-21-22-23 CAT 0.213348 0.539385\n",
"51 Comptes cashless - tots 0.176883 0.569853\n",
"265 allcomptespersonalsambnom 0.176883 0.600321\n",
"264 allcomptespersonals 0.176878 0.630789\n",
"268 compradores habituales 0.155327 0.657544\n",
"49 Compradors CE2023 0.135478 0.680881\n",
"16 All CE2023 CAT 0.116329 0.700918\n",
"160 Obren mails ES 0.089224 0.716287\n",
"263 all unsubscribed 09.03.23 0.087766 0.731405\n",
"27 All unsubscribed 09.03.23 0.087766 0.746523\n",
"106 Festivals 19-21-22-23 ES 0.077594 0.759888\n",
"273 lista bbdd opt out 0.076214 0.773016\n",
"171 Primer control de acceso 07/07/23 0.074710 0.785885\n",
"18 All CE2023 ES 0.063673 0.796853\n",
"181 Push Joan Miquel Oliver CAT 0.056174 0.806529\n",
"200 Segments Joan Miquel Oliver AX 0.049978 0.815138\n",
"204 Segments jajas 0.039996 0.822027\n",
"199 Segments Fatoumata PowerBI 0.037071 0.828412\n",
"278 segments fatoumata powerbi 0.037071 0.834798\n",
"88 Divendres CE2023 0.036547 0.841093\n",
"205 Segments jajas CAT 0.032349 0.846665\n",
"73 Dijous CE2023 0.028448 0.851565\n",
"83 Dissabte CE2023 0.028168 0.856417\n",
"201 Segments Joan Miquel Oliver PowerBI 0.027426 0.861142\n",
"279 segments joan miquel oliver powerbi 0.027426 0.865866\n",
"178 Push Fatoumata CAT 0.027253 0.870560\n",
"93 EN all 0.025675 0.874983\n",
"203 Segments Pinpilincinos PowerBI 0.020851 0.878574\n",
"280 segments pinpilincinos powerbi 0.020851 0.882166\n",
"78 Dimecres CE2023 0.020702 0.885732\n",
"89 Divendres CE2023 CAT 0.020463 0.889257\n",
"19 All CE2023 buit 0.020002 0.892702\n",
"276 regalentradesrondes 0.019458 0.896054\n",
"41 Assistents concerts last year 0.019458 0.899405\n",
"183 Push Pinpilincinos CAT 0.018708 0.902628\n",
"266 barres comedy festival 0.018066 0.905740\n",
"44 Barres Comedy festival 0.018066 0.908851\n",
"270 in risus 0.018033 0.911958\n",
"50 Compradors In Risus 0.018033 0.915064\n",
"182 Push Joan Miquel Oliver ES 0.017205 0.918027\n",
"22 All abo wkd-2dies CE2023 0.017097 0.920972\n",
"84 Dissabte CE2023 CAT 0.016080 0.923742\n",
"42 Assistents concerts last year CAT 0.015878 0.926477\n",
"9 Abonament Weekend ST 0.015400 0.929130\n",
"176 Push Al·lèrgiques CAT 0.014642 0.931652\n",
"74 Dijous CE2023 CAT 0.014139 0.934087\n",
"196 Segments Al·lèrgiques AX 0.014041 0.936506\n",
"180 Push Fatoumata ES 0.012158 0.938600\n",
"79 Dimecres CE2023 CAT 0.010287 0.940372\n",
"23 All abo wkd-2dies CE2023 CAT 0.009888 0.942075\n",
"91 Divendres CE2023 ES 0.009500 0.943712\n",
"112 Funzo & Baby Loud CT2023 0.009064 0.945273\n",
"17 All CE2023 EN 0.008232 0.946691\n",
"76 Dijous CE2023 ES 0.008001 0.948069\n",
"20 All abo full-4dies CE2023 0.007980 0.949444\n",
"184 Push Pinpilincinos ES 0.007535 0.950742\n",
"206 Segments jajas ES 0.007412 0.952018\n",
"115 Funzo & Baby Loud CT2023 buit 0.007321 0.953279\n",
"236 The Tyets CT2024 16.11 0.006798 0.954450\n",
"86 Dissabte CE2023 ES 0.006691 0.955603\n",
"7 Abonament Full ST 0.006674 0.956752\n",
"159 Obren mails EN 0.006468 0.957867\n",
"0 31 FAM CT2023 0.006468 0.958981\n",
"202 Segments Pinpilincinos AX 0.006279 0.960062\n",
"211 Sen Senra CH2024 0.006007 0.961097\n",
"81 Dimecres CE2023 ES 0.005735 0.962085\n",
"21 All abo full-4dies CE2023 CAT 0.005451 0.963024\n",
"239 The Tyets CT2024 16.11 buit 0.005414 0.963956\n",
"77 Dijous CE2023 buit 0.005257 0.964862\n",
"193 SF 7 anys o més 0.005203 0.965758\n",
"281 sf7anysomes 0.005203 0.966654\n",
"25 All abo wkd-2dies CE2023 ES 0.005034 0.967521\n",
"197 Segments Al·lèrgiques ST 0.004866 0.968359\n",
"277 segments al·lèrgiques st 0.004866 0.969198\n",
"260 XXS Comedy 0.004837 0.970031\n",
"92 Divendres CE2023 buit 0.004828 0.970862\n",
"87 Dissabte CE2023 buit 0.004614 0.971657\n",
"215 Sen Senra CH2024 buit 0.004511 0.972434\n",
"82 Dimecres CE2023 buit 0.004421 0.973196\n",
"5 Abo 3 dies CE2023 0.004272 0.973932\n",
"198 Segments Fatoumata AX 0.004219 0.974658\n",
"66 Convidats pro 0.003980 0.975344\n",
"269 identified_contacts 0.003976 0.976029\n",
"177 Push Al·lèrgiques ES 0.003885 0.976698\n",
"254 Viva Suecia CH2024 0.003770 0.977347\n",
"220 Sidonie CH2024 0.003683 0.977982\n",
"194 SF 7 anys o més CAT 0.003531 0.978590\n",
"43 Assistents concerts last year ES 0.003518 0.979196\n",
"232 The Tyets CT2024 15.11 0.003502 0.979799\n",
"283 usuaris cruïlla green day 0.003242 0.980358\n",
"240 Usuaris Cruïlla Green Day 0.003242 0.980916\n",
"1 31 FAM CT2023 CAT 0.003176 0.981463\n",
"68 Cupido CH2024 0.003148 0.982005"
2024-02-25 23:53:10 +01:00
]
},
2024-03-23 10:48:47 +01:00
"execution_count": 15,
2024-02-25 23:53:10 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-23 10:48:47 +01:00
"print_main_target('12', 100)\n"
2024-02-25 23:53:10 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": 17,
"id": "a115ebcf-4488-47f3-9d7e-75a1fca52f0f",
"metadata": {
"scrolled": true
},
2024-02-25 23:53:10 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-23 10:48:47 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n",
"Nombre de ciblage : 779658\n",
"Nombre de client avec étiquette target : 240541\n"
2024-02-25 23:53:10 +01:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-23 10:48:47 +01:00
" <th>target_name</th>\n",
2024-02-25 23:53:10 +01:00
" <th>customer_id</th>\n",
2024-03-23 10:48:47 +01:00
" <th>cumulative_customers</th>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>11</th>\n",
" <td>BDS 17/18/19</td>\n",
" <td>0.371483</td>\n",
" <td>0.114611</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>28</th>\n",
" <td>Cible non-acheteurs franciliens</td>\n",
" <td>0.208871</td>\n",
" <td>0.179052</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>FHU 2018 - Acheteurs bds</td>\n",
" <td>0.184817</td>\n",
" <td>0.236071</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-02-26 22:47:36 +01:00
" <th>14</th>\n",
2024-03-23 10:48:47 +01:00
" <td>BDS FHU18</td>\n",
" <td>0.184505</td>\n",
" <td>0.292995</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>BDS 2021</td>\n",
" <td>0.183229</td>\n",
" <td>0.349525</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>importer_huma</td>\n",
" <td>0.183229</td>\n",
" <td>0.406055</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>BDS FHU21</td>\n",
" <td>0.183224</td>\n",
" <td>0.462584</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>BCOM 17/18/19</td>\n",
" <td>0.162247</td>\n",
" <td>0.512640</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-02-26 22:47:36 +01:00
" <th>16</th>\n",
2024-03-23 10:48:47 +01:00
" <td>BDS FHU23 VDéf</td>\n",
" <td>0.152298</td>\n",
" <td>0.559627</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>BDS 2022</td>\n",
" <td>0.139095</td>\n",
" <td>0.602541</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Cible non-acheteurs provinciaux</td>\n",
" <td>0.136991</td>\n",
" <td>0.644806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>BCOM 2023 - PASS 3 JOURS</td>\n",
" <td>0.106946</td>\n",
" <td>0.677801</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>55</th>\n",
" <td>PROSPECT CONTACT -35ANS</td>\n",
" <td>0.103483</td>\n",
" <td>0.709728</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>73</th>\n",
" <td>billets bds au 11/09</td>\n",
" <td>0.077604</td>\n",
" <td>0.733670</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>64</th>\n",
" <td>Pass 3J</td>\n",
" <td>0.070337</td>\n",
" <td>0.755371</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>9</th>\n",
" <td>BCOM FHU18</td>\n",
" <td>0.064214</td>\n",
" <td>0.775182</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>6</th>\n",
" <td>BCOM 2018</td>\n",
" <td>0.059470</td>\n",
" <td>0.793530</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>72</th>\n",
" <td>bds 2022 +50 ans</td>\n",
" <td>0.052519</td>\n",
" <td>0.809733</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>10</th>\n",
" <td>BCOM FHU19</td>\n",
" <td>0.045593</td>\n",
" <td>0.823800</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-02-26 22:47:36 +01:00
" <th>36</th>\n",
2024-03-23 10:48:47 +01:00
" <td>Festivaliers WEB 2020 - plein tarif</td>\n",
" <td>0.045410</td>\n",
" <td>0.837810</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>19</th>\n",
" <td>CAMPING</td>\n",
" <td>0.041178</td>\n",
" <td>0.850514</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>41</th>\n",
" <td>Inscriptions newsletters (depuis 2019)</td>\n",
" <td>0.039598</td>\n",
" <td>0.862731</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>56</th>\n",
" <td>PROSPECT FHU NORMANDIE 2021 #1</td>\n",
" <td>0.038925</td>\n",
" <td>0.874740</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>25</th>\n",
" <td>Camping FHU22</td>\n",
" <td>0.033716</td>\n",
" <td>0.885142</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>3</th>\n",
" <td>ACTIVATION BDS 24/08</td>\n",
" <td>0.031862</td>\n",
" <td>0.894972</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>75</th>\n",
" <td>data bcom 2023 - 15.06.23</td>\n",
" <td>0.026594</td>\n",
" <td>0.903177</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>20</th>\n",
" <td>CAMPING 2021 #1</td>\n",
" <td>0.022154</td>\n",
" <td>0.910012</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>21</th>\n",
" <td>CAMPING 24/08</td>\n",
" <td>0.022150</td>\n",
" <td>0.916846</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>58</th>\n",
" <td>PROSPECT FHU NORMANDIE 2021 #3</td>\n",
" <td>0.020579</td>\n",
" <td>0.923195</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>54</th>\n",
" <td>PROSPECT ACHETEURS FHU23 POUR FHU NORMANDIE 2023</td>\n",
" <td>0.017901</td>\n",
" <td>0.928717</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>85</th>\n",
" <td>prospect acheteurs fhu23_pour_fhu rouen 2023</td>\n",
" <td>0.017901</td>\n",
" <td>0.934240</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>63</th>\n",
" <td>PROSPECT NORMANDIE</td>\n",
" <td>0.014118</td>\n",
" <td>0.938596</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>60</th>\n",
" <td>PROSPECT FHU NORMANDIE 2022 BDS</td>\n",
" <td>0.013777</td>\n",
" <td>0.942847</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>87</th>\n",
" <td>prospect bds fhu normandie</td>\n",
" <td>0.013777</td>\n",
" <td>0.947097</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>44</th>\n",
" <td>PARKING 2021 #1</td>\n",
" <td>0.012484</td>\n",
" <td>0.950949</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>45</th>\n",
" <td>PARKING 24/08</td>\n",
" <td>0.012081</td>\n",
" <td>0.954676</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>42</th>\n",
" <td>PARKING - PORTE B</td>\n",
" <td>0.011848</td>\n",
" <td>0.958332</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>43</th>\n",
" <td>PARKING - PORTE J</td>\n",
" <td>0.011736</td>\n",
" <td>0.961953</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>86</th>\n",
" <td>prospect bcom fhu normandie</td>\n",
" <td>0.010709</td>\n",
" <td>0.965257</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>59</th>\n",
" <td>PROSPECT FHU NORMANDIE 2022 BCOM</td>\n",
" <td>0.010709</td>\n",
" <td>0.968561</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>50</th>\n",
" <td>PASS 3J - ENFANTS</td>\n",
" <td>0.009716</td>\n",
" <td>0.971558</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>49</th>\n",
" <td>PASS 3J - ADOS</td>\n",
" <td>0.008643</td>\n",
" <td>0.974225</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>18</th>\n",
" <td>BILLETS NUIT</td>\n",
" <td>0.007333</td>\n",
" <td>0.976487</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>51</th>\n",
" <td>PASS Culture 2023 au 11/09</td>\n",
" <td>0.007092</td>\n",
" <td>0.978675</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>84</th>\n",
" <td>pass culture 11/09</td>\n",
" <td>0.007092</td>\n",
" <td>0.980863</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>83</th>\n",
" <td>pass culture - pass 3j</td>\n",
" <td>0.007076</td>\n",
" <td>0.983046</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>4</th>\n",
" <td>Ados FHU22</td>\n",
" <td>0.006718</td>\n",
" <td>0.985119</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>33</th>\n",
" <td>Enfants FHU22</td>\n",
" <td>0.006631</td>\n",
" <td>0.987165</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
2024-03-23 10:48:47 +01:00
" <tr>\n",
" <th>46</th>\n",
" <td>PARKING CAMPEURS</td>\n",
" <td>0.004781</td>\n",
" <td>0.988640</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>39</th>\n",
" <td>INSCRIPTION NL VOYAGES HUMA</td>\n",
" <td>0.003625</td>\n",
" <td>0.989758</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>17</th>\n",
" <td>BILLET CAMPING-CAR</td>\n",
" <td>0.002594</td>\n",
" <td>0.990559</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>61</th>\n",
" <td>PROSPECT FORUM LOGEMENT 2022</td>\n",
" <td>0.002062</td>\n",
" <td>0.991195</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>27</th>\n",
" <td>Camping-car FHU22</td>\n",
" <td>0.001996</td>\n",
" <td>0.991811</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
2024-03-23 10:48:47 +01:00
" <td>Camping Zen FHU22</td>\n",
" <td>0.001596</td>\n",
" <td>0.992303</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>66</th>\n",
" <td>RESP. STANDS POUR FHUA2020</td>\n",
" <td>0.001559</td>\n",
" <td>0.992784</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>52</th>\n",
" <td>PRESSE 2021</td>\n",
" <td>0.001559</td>\n",
" <td>0.993265</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>70</th>\n",
" <td>accreditations presse_fhu23</td>\n",
" <td>0.001434</td>\n",
" <td>0.993707</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>0</th>\n",
" <td>ACCREDITES - FHU 23</td>\n",
" <td>0.001434</td>\n",
" <td>0.994150</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>74</th>\n",
" <td>billets enfants 2023_06_05</td>\n",
" <td>0.001239</td>\n",
" <td>0.994532</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>2</th>\n",
" <td>ACHETEURS TIPIS</td>\n",
" <td>0.001023</td>\n",
" <td>0.994848</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>8</th>\n",
" <td>BCOM FHU NORMANDIE 2021</td>\n",
" <td>0.001023</td>\n",
" <td>0.995163</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>88</th>\n",
" <td>prospect forum logement 2023</td>\n",
" <td>0.001014</td>\n",
" <td>0.995476</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>62</th>\n",
" <td>PROSPECT FORUM LOGEMENT 2023</td>\n",
" <td>0.001014</td>\n",
" <td>0.995789</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>38</th>\n",
" <td>INSCRIPTION FORUM LOGEMENT - 14/02</td>\n",
" <td>0.000840</td>\n",
" <td>0.996048</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>57</th>\n",
" <td>PROSPECT FHU NORMANDIE 2021 #2</td>\n",
" <td>0.000802</td>\n",
" <td>0.996296</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>22</th>\n",
" <td>CN - 38e CONGRÈS / 2021</td>\n",
" <td>0.000790</td>\n",
" <td>0.996540</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>65</th>\n",
" <td>RESP. STANDS PCF 2021</td>\n",
" <td>0.000765</td>\n",
" <td>0.996776</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>40</th>\n",
" <td>INSCRITS VISIO 14/01/22</td>\n",
" <td>0.000698</td>\n",
" <td>0.996991</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>24</th>\n",
" <td>CROISIÉRISTES 2021 / RIVAGES DU MONDE</td>\n",
" <td>0.000686</td>\n",
" <td>0.997203</td>\n",
2024-02-25 23:53:10 +01:00
" </tr>\n",
2024-03-23 10:48:47 +01:00
" <tr>\n",
" <th>31</th>\n",
" <td>DINER DE PRESSE 2020</td>\n",
" <td>0.000669</td>\n",
" <td>0.997409</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>71</th>\n",
" <td>acheteurs bcom fhu normandie 2022</td>\n",
" <td>0.000657</td>\n",
" <td>0.997612</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-23 10:48:47 +01:00
" <td>ACHETEURS BCOM FHU NORMANDIE 2022</td>\n",
" <td>0.000657</td>\n",
" <td>0.997814</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>47</th>\n",
" <td>PARKING PMR - PORTE B</td>\n",
" <td>0.000636</td>\n",
" <td>0.998011</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>53</th>\n",
" <td>PROSPECT / TOURISTRA</td>\n",
" <td>0.000628</td>\n",
" <td>0.998204</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>PARKING PMR - PORTE J</td>\n",
" <td>0.000590</td>\n",
" <td>0.998386</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>fichier presse - journalistes2</td>\n",
" <td>0.000578</td>\n",
" <td>0.998565</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>35</th>\n",
" <td>FHU22 - DIFFUSION CP</td>\n",
" <td>0.000570</td>\n",
" <td>0.998740</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
2024-03-23 10:48:47 +01:00
" <tr>\n",
" <th>32</th>\n",
" <td>DINER DE PRESSE 2021</td>\n",
" <td>0.000549</td>\n",
" <td>0.998910</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>81</th>\n",
" <td>liste médias cp</td>\n",
" <td>0.000516</td>\n",
" <td>0.999069</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>67</th>\n",
" <td>SECRETAIRES FÉDÉRAUX 2021</td>\n",
" <td>0.000437</td>\n",
" <td>0.999203</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
2024-03-23 10:48:47 +01:00
" <td>Soirée solidarité Ukraine</td>\n",
" <td>0.000437</td>\n",
" <td>0.999338</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>89</th>\n",
" <td>tipi 2023</td>\n",
" <td>0.000412</td>\n",
" <td>0.999465</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>69</th>\n",
" <td>TIPI 2023</td>\n",
" <td>0.000412</td>\n",
" <td>0.999592</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>77</th>\n",
" <td>fhu22 - pass 3j shotgun</td>\n",
" <td>0.000353</td>\n",
" <td>0.999701</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>30</th>\n",
" <td>DEMANDES PRESSE 2022</td>\n",
" <td>0.000283</td>\n",
" <td>0.999788</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>76</th>\n",
" <td>demandes accréditation 2023</td>\n",
" <td>0.000254</td>\n",
" <td>0.999867</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>23</th>\n",
" <td>COIN DES MÔMES</td>\n",
" <td>0.000170</td>\n",
" <td>0.999919</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>82</th>\n",
" <td>orders_302493 (1)</td>\n",
" <td>0.000108</td>\n",
" <td>0.999953</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" <tr>\n",
2024-03-23 10:48:47 +01:00
" <th>37</th>\n",
" <td>Humacumba Shotgun FHU22</td>\n",
" <td>0.000108</td>\n",
" <td>0.999986</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>liste agences cp</td>\n",
" <td>0.000046</td>\n",
" <td>1.000000</td>\n",
2024-02-28 21:57:28 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-23 10:48:47 +01:00
" target_name customer_id \\\n",
"11 BDS 17/18/19 0.371483 \n",
"28 Cible non-acheteurs franciliens 0.208871 \n",
"34 FHU 2018 - Acheteurs bds 0.184817 \n",
"14 BDS FHU18 0.184505 \n",
"12 BDS 2021 0.183229 \n",
"79 importer_huma 0.183229 \n",
"15 BDS FHU21 0.183224 \n",
"5 BCOM 17/18/19 0.162247 \n",
"16 BDS FHU23 VDéf 0.152298 \n",
"13 BDS 2022 0.139095 \n",
"29 Cible non-acheteurs provinciaux 0.136991 \n",
"7 BCOM 2023 - PASS 3 JOURS 0.106946 \n",
"55 PROSPECT CONTACT -35ANS 0.103483 \n",
"73 billets bds au 11/09 0.077604 \n",
"64 Pass 3J 0.070337 \n",
"9 BCOM FHU18 0.064214 \n",
"6 BCOM 2018 0.059470 \n",
"72 bds 2022 +50 ans 0.052519 \n",
"10 BCOM FHU19 0.045593 \n",
"36 Festivaliers WEB 2020 - plein tarif 0.045410 \n",
"19 CAMPING 0.041178 \n",
"41 Inscriptions newsletters (depuis 2019) 0.039598 \n",
"56 PROSPECT FHU NORMANDIE 2021 #1 0.038925 \n",
"25 Camping FHU22 0.033716 \n",
"3 ACTIVATION BDS 24/08 0.031862 \n",
"75 data bcom 2023 - 15.06.23 0.026594 \n",
"20 CAMPING 2021 #1 0.022154 \n",
"21 CAMPING 24/08 0.022150 \n",
"58 PROSPECT FHU NORMANDIE 2021 #3 0.020579 \n",
"54 PROSPECT ACHETEURS FHU23 POUR FHU NORMANDIE 2023 0.017901 \n",
"85 prospect acheteurs fhu23_pour_fhu rouen 2023 0.017901 \n",
"63 PROSPECT NORMANDIE 0.014118 \n",
"60 PROSPECT FHU NORMANDIE 2022 BDS 0.013777 \n",
"87 prospect bds fhu normandie 0.013777 \n",
"44 PARKING 2021 #1 0.012484 \n",
"45 PARKING 24/08 0.012081 \n",
"42 PARKING - PORTE B 0.011848 \n",
"43 PARKING - PORTE J 0.011736 \n",
"86 prospect bcom fhu normandie 0.010709 \n",
"59 PROSPECT FHU NORMANDIE 2022 BCOM 0.010709 \n",
"50 PASS 3J - ENFANTS 0.009716 \n",
"49 PASS 3J - ADOS 0.008643 \n",
"18 BILLETS NUIT 0.007333 \n",
"51 PASS Culture 2023 au 11/09 0.007092 \n",
"84 pass culture 11/09 0.007092 \n",
"83 pass culture - pass 3j 0.007076 \n",
"4 Ados FHU22 0.006718 \n",
"33 Enfants FHU22 0.006631 \n",
"46 PARKING CAMPEURS 0.004781 \n",
"39 INSCRIPTION NL VOYAGES HUMA 0.003625 \n",
"17 BILLET CAMPING-CAR 0.002594 \n",
"61 PROSPECT FORUM LOGEMENT 2022 0.002062 \n",
"27 Camping-car FHU22 0.001996 \n",
"26 Camping Zen FHU22 0.001596 \n",
"66 RESP. STANDS POUR FHUA2020 0.001559 \n",
"52 PRESSE 2021 0.001559 \n",
"70 accreditations presse_fhu23 0.001434 \n",
"0 ACCREDITES - FHU 23 0.001434 \n",
"74 billets enfants 2023_06_05 0.001239 \n",
"2 ACHETEURS TIPIS 0.001023 \n",
"8 BCOM FHU NORMANDIE 2021 0.001023 \n",
"88 prospect forum logement 2023 0.001014 \n",
"62 PROSPECT FORUM LOGEMENT 2023 0.001014 \n",
"38 INSCRIPTION FORUM LOGEMENT - 14/02 0.000840 \n",
"57 PROSPECT FHU NORMANDIE 2021 #2 0.000802 \n",
"22 CN - 38e CONGRÈS / 2021 0.000790 \n",
"65 RESP. STANDS PCF 2021 0.000765 \n",
"40 INSCRITS VISIO 14/01/22 0.000698 \n",
"24 CROISIÉRISTES 2021 / RIVAGES DU MONDE 0.000686 \n",
"31 DINER DE PRESSE 2020 0.000669 \n",
"71 acheteurs bcom fhu normandie 2022 0.000657 \n",
"1 ACHETEURS BCOM FHU NORMANDIE 2022 0.000657 \n",
"47 PARKING PMR - PORTE B 0.000636 \n",
"53 PROSPECT / TOURISTRA 0.000628 \n",
"48 PARKING PMR - PORTE J 0.000590 \n",
"78 fichier presse - journalistes2 0.000578 \n",
"35 FHU22 - DIFFUSION CP 0.000570 \n",
"32 DINER DE PRESSE 2021 0.000549 \n",
"81 liste médias cp 0.000516 \n",
"67 SECRETAIRES FÉDÉRAUX 2021 0.000437 \n",
"68 Soirée solidarité Ukraine 0.000437 \n",
"89 tipi 2023 0.000412 \n",
"69 TIPI 2023 0.000412 \n",
"77 fhu22 - pass 3j shotgun 0.000353 \n",
"30 DEMANDES PRESSE 2022 0.000283 \n",
"76 demandes accréditation 2023 0.000254 \n",
"23 COIN DES MÔMES 0.000170 \n",
"82 orders_302493 (1) 0.000108 \n",
"37 Humacumba Shotgun FHU22 0.000108 \n",
"80 liste agences cp 0.000046 \n",
2024-02-28 21:57:28 +01:00
"\n",
2024-03-23 10:48:47 +01:00
" cumulative_customers \n",
"11 0.114611 \n",
"28 0.179052 \n",
"34 0.236071 \n",
"14 0.292995 \n",
"12 0.349525 \n",
"79 0.406055 \n",
"15 0.462584 \n",
"5 0.512640 \n",
"16 0.559627 \n",
"13 0.602541 \n",
"29 0.644806 \n",
"7 0.677801 \n",
"55 0.709728 \n",
"73 0.733670 \n",
"64 0.755371 \n",
"9 0.775182 \n",
"6 0.793530 \n",
"72 0.809733 \n",
"10 0.823800 \n",
"36 0.837810 \n",
"19 0.850514 \n",
"41 0.862731 \n",
"56 0.874740 \n",
"25 0.885142 \n",
"3 0.894972 \n",
"75 0.903177 \n",
"20 0.910012 \n",
"21 0.916846 \n",
"58 0.923195 \n",
"54 0.928717 \n",
"85 0.934240 \n",
"63 0.938596 \n",
"60 0.942847 \n",
"87 0.947097 \n",
"44 0.950949 \n",
"45 0.954676 \n",
"42 0.958332 \n",
"43 0.961953 \n",
"86 0.965257 \n",
"59 0.968561 \n",
"50 0.971558 \n",
"49 0.974225 \n",
"18 0.976487 \n",
"51 0.978675 \n",
"84 0.980863 \n",
"83 0.983046 \n",
"4 0.985119 \n",
"33 0.987165 \n",
"46 0.988640 \n",
"39 0.989758 \n",
"17 0.990559 \n",
"61 0.991195 \n",
"27 0.991811 \n",
"26 0.992303 \n",
"66 0.992784 \n",
"52 0.993265 \n",
"70 0.993707 \n",
"0 0.994150 \n",
"74 0.994532 \n",
"2 0.994848 \n",
"8 0.995163 \n",
"88 0.995476 \n",
"62 0.995789 \n",
"38 0.996048 \n",
"57 0.996296 \n",
"22 0.996540 \n",
"65 0.996776 \n",
"40 0.996991 \n",
"24 0.997203 \n",
"31 0.997409 \n",
"71 0.997612 \n",
"1 0.997814 \n",
"47 0.998011 \n",
"53 0.998204 \n",
"48 0.998386 \n",
"78 0.998565 \n",
"35 0.998740 \n",
"32 0.998910 \n",
"81 0.999069 \n",
"67 0.999203 \n",
"68 0.999338 \n",
"89 0.999465 \n",
"69 0.999592 \n",
"77 0.999701 \n",
"30 0.999788 \n",
"76 0.999867 \n",
"23 0.999919 \n",
"82 0.999953 \n",
"37 0.999986 \n",
"80 1.000000 "
2024-02-28 21:57:28 +01:00
]
},
2024-03-23 10:48:47 +01:00
"execution_count": 17,
2024-02-28 21:57:28 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-23 10:48:47 +01:00
"source": [
"print_main_target('14', 100)\n"
]
},
{
"cell_type": "markdown",
"id": "605cced5-052f-4a99-ac26-020c5d2ab633",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## KPI sur tags"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "916c3e2b-04d3-4877-b894-8f26f10d926e",
"metadata": {},
"outputs": [],
"source": [
"customersplus = load_dataset_2(\"4\", \"customersplus\")[['id', 'structure_id']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "46847b24-15a4-464e-969f-f16ed3653f1f",
"metadata": {},
"outputs": [],
"source": [
"structure_tag_mappings = load_dataset_2('4', \"structure_tag_mappings\")[['structure_id', 'tag_id']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c10c69d-735f-453e-96bf-750697d965d0",
"metadata": {},
"outputs": [],
"source": [
"customersplus[customersplus['structure_id'].notna()]['structure_id'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b0e77b3-5f16-4484-9564-7d3826583418",
"metadata": {},
"outputs": [],
"source": [
"len(customersplus[customersplus['structure_id'].notna()])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfa27722-37f9-435a-8221-8aa6f9a4a107",
"metadata": {},
"outputs": [],
"source": [
"structure_tag_mappings['structure_id'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2daabdd5-31e3-4918-9856-9bbc30cde602",
"metadata": {},
"outputs": [],
"source": [
"def tags_information(tenant_id, first_tags):\n",
"\n",
" customersplus = load_dataset_2(tenant_id, \"customersplus\")[['id', 'structure_id']]\n",
" customersplus.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
" tags = load_dataset_2(tenant_id, \"tags\")[['id', 'name']]\n",
" tags.rename(columns = {'id' : 'tag_id', 'name' : 'tag_name'}, inplace = True)\n",
" structure_tag_mappings = load_dataset_2(tenant_id, \"structure_tag_mappings\")[['structure_id', 'tag_id']]\n",
" \n",
" customer_tags = pd.merge(customersplus, structure_tag_mappings, on = 'structure_id', how = 'left')\n",
" customer_tags = pd.merge(customer_tags, tags, on = 'tag_id', how = 'inner')\n",
" \n",
" nb_customers_with_tag = customer_tags['customer_id'].nunique()\n",
" \n",
" print('Nombre de client avec tag : ', nb_customers_with_tag)\n",
" print('Proportion de clients avec tags : ', nb_customers_with_tag/len(customersplus))\n",
" print('Moyenne de tags par client : ', len(customer_tags)/nb_customers_with_tag)\n",
" \n",
" info = customer_tags.groupby(['tag_id', 'tag_name'])['customer_id'].count().reset_index().sort_values('customer_id', ascending = False).head(first_tags)\n",
"\n",
" return info"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b9f5f71-a927-4cc8-bb0c-9538e28d3553",
"metadata": {},
"outputs": [],
"source": [
"tags_information(\"1\", 20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd5bef41-1774-4601-86b5-b7c1aea8f1d2",
"metadata": {},
"outputs": [],
"source": [
"tags_information(\"2\", 20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c2dc3e6-1418-44db-a8c0-4a9d59ec5232",
"metadata": {},
"outputs": [],
"source": [
"load_dataset_2(\"2\", \"tags\")[['id', 'name']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7b2c670-7122-4f67-b1aa-8c80a10f16d8",
"metadata": {},
"outputs": [],
"source": [
"tags_information(\"3\", 20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76639995-252d-4a58-83d8-c0c00900c3a9",
"metadata": {},
"outputs": [],
"source": [
"tags_information(\"4\", 20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07e91791-d4d4-42b1-ac18-22d3b0b9f7bd",
"metadata": {},
"outputs": [],
"source": [
"tags_information(\"101\", 20)"
]
},
{
"cell_type": "markdown",
"id": "87d131cd-ead0-4ef4-a8ee-b09022d08ffa",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## KPI product"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26582be9-cfd1-48ea-a0a7-31101fdeb9d1",
"metadata": {},
"outputs": [],
"source": [
"tenant_id = \"1\"\n",
"\n",
"df_product = display_databases(tenant_id, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
"\n",
"df_product.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "533bf499-dd56-4d29-b261-ca1e4928c9c7",
"metadata": {},
"outputs": [],
2024-02-28 21:57:28 +01:00
"source": [
"nb_tickets_per_events = df_product.groupby(['name_event_types', 'name_events'])['ticket_id'].count().reset_index().sort_values('ticket_id', ascending = False)\n",
"nb_tickets_per_events['prop_tickets'] = round(nb_tickets_per_events['ticket_id']/len(df_product), 3)\n",
"nb_tickets_per_events"
]
},
2024-02-25 18:33:24 +01:00
{
"cell_type": "markdown",
"id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901",
2024-03-13 23:24:38 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
2024-02-25 18:33:24 +01:00
"source": [
"# Fin travail 25/02"
]
},
{
"cell_type": "markdown",
"id": "c437eaec",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
2024-01-13 10:38:10 +01:00
"# Exemple sur Company 1"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "a1c1fc39",
2024-01-13 10:38:10 +01:00
"metadata": {},
"source": [
"## Chargement données"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "66f8c17b",
"metadata": {},
2024-01-13 10:38:10 +01:00
"outputs": [],
"source": [
"BUCKET = \"bdc2324-data/1\"\n",
"liste_database = fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "c08e6798",
2024-01-13 10:38:10 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-01-13 10:38:10 +01:00
"source": [
2024-01-13 14:14:11 +01:00
"liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'type_ofs']\n",
2024-01-13 10:38:10 +01:00
"\n",
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
"liste_database_filtered = [element for element in liste_database if any(element_part in element for element_part in liste_database_select)]\n",
"\n",
"# Afficher le résultat\n",
"print(liste_database_filtered)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "675f518d",
2024-01-13 10:38:10 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-01-13 10:38:10 +01:00
"source": [
2024-02-04 16:02:01 +01:00
"# loop to create dataframes from liste\n",
"files_path = liste_database\n",
2024-01-13 10:38:10 +01:00
"\n",
"client_number = files_path[0].split(\"/\")[1]\n",
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
"\n",
"for i in range(len(files_path)) :\n",
" current_path = files_path[i]\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
" # the pattern of the name is df1xxx\n",
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
" globals()[nom_dataframe] = df"
]
},
2024-02-05 22:04:02 +01:00
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "e855f403",
2024-02-19 23:11:28 +01:00
"metadata": {},
2024-02-05 22:04:02 +01:00
"source": [
"## customersplus.csv"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "91a8f8c4",
2024-02-05 22:04:02 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-02-05 22:04:02 +01:00
"source": [
"a = pd.DataFrame(df1_customersplus.info())"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "2fda171d",
2024-02-05 22:04:02 +01:00
"metadata": {},
"outputs": [],
"source": [
"def info_colonnes_dataframe(df):\n",
" # Créer une liste pour stocker les informations sur chaque colonne\n",
" infos_colonnes = []\n",
"\n",
" # Parcourir les colonnes du DataFrame\n",
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
" # Calculer le taux de valeurs manquantes\n",
" taux_na = serie.isna().mean() * 100\n",
"\n",
" # Ajouter les informations à la liste\n",
" infos_colonnes.append({\n",
" 'Nom_colonne': nom_colonne,\n",
" 'Type_colonne': str(serie.dtype),\n",
" 'Taux_NA': taux_na\n",
" })\n",
"\n",
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
"\n",
" return df_infos_colonnes"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "205eeeab",
2024-02-05 22:04:02 +01:00
"metadata": {},
"outputs": [],
"source": [
"def cleaning_date(df, column_name):\n",
" \"\"\"\n",
" Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
"\n",
" Parameters:\n",
" - df: DataFrame\n",
" Le DataFrame contenant la colonne à nettoyer.\n",
" - column_name: str\n",
" Le nom de la colonne à nettoyer.\n",
"\n",
" Returns:\n",
" - DataFrame\n",
" Le DataFrame modifié avec la colonne nettoyée.\n",
" \"\"\"\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
" return df"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "634282c5",
2024-02-05 22:04:02 +01:00
"metadata": {},
"outputs": [],
"source": [
"a = info_colonnes_dataframe(df1_customersplus)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "0e8d4133",
2024-02-05 22:04:02 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-02-05 22:04:02 +01:00
"source": [
"a"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "1268ad5a",
2024-02-05 22:04:02 +01:00
"metadata": {},
"outputs": [],
"source": [
"a = pd.DataFrame(df1_customersplus.isna().sum()/len(df1_customersplus)*100)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "bd41dc80",
2024-02-05 22:04:02 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-02-05 22:04:02 +01:00
"source": [
"# Selection des variables\n",
"df1_customersplus_clean = df1_customersplus.copy()\n",
"\n",
"cleaning_date(df1_customersplus_clean, 'first_buying_date')\n",
"cleaning_date(df1_customersplus_clean, 'last_visiting_date')\n",
"\n",
"df1_customersplus_clean.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n",
"df1_customersplus_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
"\n"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "64d0f76b",
2024-02-04 16:02:01 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## tickets.csv"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "7e683711",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_tickets"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "e7b9a52e",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_tickets.info()"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "568280e8",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_tickets.isna().sum()/len(df1_tickets)*100"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "29ecec90",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"# Selection des variables\n",
2024-02-05 22:04:02 +01:00
"df1_tickets_clean = df1_tickets.drop(['lastname', 'firstname', 'email', 'created_at', 'updated_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode'], axis = 1, inplace=True)\n",
"df1_tickets_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)"
]
},
2024-01-13 10:38:10 +01:00
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "22bb5de4",
2024-02-10 13:23:44 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
2024-01-13 10:38:10 +01:00
"source": [
"## suppliers.csv"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "6a9a91f4",
2024-01-13 10:38:10 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-01-13 10:38:10 +01:00
"source": [
"df1_suppliers"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "bab4758a",
2024-01-13 10:38:10 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-01-13 10:38:10 +01:00
"source": [
"df1_suppliers.info()"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "b5fff251",
2024-01-13 10:38:10 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_suppliers.isna().sum()/len(df1_suppliers)*100"
2024-01-13 10:38:10 +01:00
]
},
2024-01-13 14:14:11 +01:00
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "8b09e2a3",
2024-01-13 14:14:11 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_suppliers_clean = df1_suppliers[['id', 'name']]\n",
"df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)"
2024-01-13 10:38:10 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "ecee7cdc",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_suppliers_clean"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "c8e6e69b",
2024-02-04 16:02:01 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## type_ofs.csv"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "1a6cff1f",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_type_ofs"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "93630b41",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_type_ofs.info()"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "4f94481a",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_type_ofs_clean = df1_type_ofs[['id', 'name', 'children']]\n",
"df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "1b2811e2",
2024-02-04 16:02:01 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## purchases.csv"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "2455d2e1",
"metadata": {
"scrolled": true
},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_purchases"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "5f9a159d",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_purchases.info()"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "db201bf7",
"metadata": {},
"outputs": [],
"source": [
"# Nettoyage purchase_date\n",
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], utc = True)\n",
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], format = 'ISO8601')"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "bd436fca",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_purchases.info()"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "83435862",
"metadata": {},
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_purchases_clean = df1_purchases[['id', 'purchase_date', 'customer_id']]"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "f210e730",
2024-02-10 13:23:44 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Fusion de l'ensemble des données billétiques"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "1f8b3aa7",
"metadata": {},
"outputs": [],
"source": [
"# Fusion avec fournisseurs\n",
"df1_ticket_information = pd.merge(df1_tickets_clean, df1_suppliers_clean, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
"df1_ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
"\n",
"# Fusion avec type de tickets\n",
"df1_ticket_information = pd.merge(df1_ticket_information, df1_type_ofs_clean, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
"df1_ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
"\n",
"# Fusion avec achats\n",
"df1_ticket_information = pd.merge(df1_ticket_information, df1_purchases_clean, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
"df1_ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "83a4d021",
"metadata": {
"scrolled": true
},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_ticket_information"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "56e6ebd1",
2024-02-10 13:23:44 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Utilisation de fonctions"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "88fcde4b",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
2024-02-04 16:02:01 +01:00
"# Créer un DataFrame exemple\n",
"df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n",
"\n",
"# Appliquer la fonction pour nettoyer la colonne 'purchase_date' de manière vectorisée\n",
"df_clean = cleaning_date(df_not_clean, 'opened_at')\n",
"df_clean.rename(columns = {'opened_at' : 'opened_at_clean'}, inplace = True)\n",
"\n",
"test = pd.concat([df1_campaign_stats[['opened_at']].head(20), df_clean], axis=1)\n",
"\n",
"test.info()"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "818f69db",
2024-02-04 16:02:01 +01:00
"metadata": {},
"source": [
"## Nettoyage, selection et fusion"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "c9654eda",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
"source": [
"df1_ticket_information"
]
},
2024-02-04 16:02:01 +01:00
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "7f2b620c",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
"df1_ticket_information.info()"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "637bdb72",
"metadata": {},
"source": [
2024-02-04 16:02:01 +01:00
"# Customer information"
]
},
{
2024-02-04 16:02:01 +01:00
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "14c52894",
2024-03-13 23:24:38 +01:00
"metadata": {},
"source": [
2024-02-04 16:02:01 +01:00
"## Target area"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "d83abfbf",
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
"source": [
2024-02-04 16:02:01 +01:00
"# Target.csv cleaning\n",
"df1_targets_clean = df1_targets[[\"id\", \"target_type_id\", \"name\"]]\n",
"df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
"\n",
"# target_type cleaning\n",
"df1_target_types_clean = df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
"\n",
"#customer_target_mappings cleaning\n",
"df1_customer_target_mappings_clean = df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
"\n",
"# Merge target et target_type\n",
"df1_targets_full = pd.merge(df1_targets_clean, df1_target_types_clean, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
"df1_targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
"\n",
"# Merge\n",
"df1_targets_full = pd.merge(df1_customer_target_mappings_clean, df1_targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
"df1_targets_full.drop(['target_id'], axis = 1, inplace=True)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "90d71b2c",
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
"df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n",
"len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n",
"\n",
"# 99,6% des 151 000 client visés sont catégorisés plusieurs fois et en moyenne 5 fois... \n",
"df1_targets_test.mean()\n"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "2301de1e",
2024-02-07 23:28:55 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
2024-02-07 23:28:55 +01:00
"df1_targets_full.head()"
2024-02-04 16:02:01 +01:00
]
},
{
2024-02-07 23:28:55 +01:00
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "75fbc2f7",
2024-02-07 23:28:55 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
2024-02-07 23:28:55 +01:00
"# Catégorisation des target_name\n",
"import pandas as pd\n",
"import nltk\n",
"from nltk.tokenize import word_tokenize\n",
"from nltk.corpus import stopwords\n",
"from nltk.stem import WordNetLemmatizer\n",
"from nltk.probability import FreqDist\n",
"\n",
"# Téléchargement des ressources nécessaires\n",
"nltk.download('punkt')\n",
"nltk.download('stopwords')\n",
"nltk.download('wordnet')\n",
"\n"
2024-02-04 16:02:01 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "55cddf92",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
2024-02-07 23:28:55 +01:00
"# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n",
"def preprocess_text(texte):\n",
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
" texte_concat = ' '.join(texte)\n",
" \n",
" # Tokenisation des mots\n",
" tokens = word_tokenize(texte_concat.lower())\n",
" \n",
" # Suppression des mots vides (stopwords)\n",
" stop_words = set(stopwords.words('french'))\n",
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
" \n",
" # Lemmatisation des mots\n",
" lemmatizer = WordNetLemmatizer()\n",
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
" \n",
" return lemmatized_tokens\n",
2024-02-04 16:02:01 +01:00
"\n",
"\n",
2024-02-07 23:28:55 +01:00
"# Appliquer le prétraitement à la colonne de texte\n",
"df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n",
"\n",
"# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n",
"all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n",
"\n",
"# Calculer la fréquence des mots\n",
"freq_dist = FreqDist(all_words)\n",
"\n",
"\n"
2024-02-04 16:02:01 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "7fd98a85",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
2024-02-07 23:28:55 +01:00
"# Affichage des mots les plus fréquents\n",
"print(\"Mots les plus fréquents:\")\n",
"for mot, freq in freq_dist.most_common(15):\n",
" print(f\"{mot}: {freq}\")"
2024-02-04 16:02:01 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "cf94bb1d",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-07 23:28:55 +01:00
"source": [
"import pandas as pd\n",
"import nltk\n",
"from nltk.tokenize import word_tokenize\n",
"from nltk.corpus import stopwords\n",
"from nltk.stem import WordNetLemmatizer\n",
"\n",
"# Téléchargement des ressources nécessaires\n",
"nltk.download('punkt')\n",
"nltk.download('stopwords')\n",
"nltk.download('wordnet')\n",
"\n",
"# Création de la DataFrame d'exemple\n",
"data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n",
"df = pd.DataFrame(data)\n",
"\n",
"# Fonction pour prétraiter le texte\n",
"def preprocess_text(texte):\n",
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
" texte_concat = ' '.join(texte)\n",
" \n",
" # Tokenisation des mots\n",
" tokens = word_tokenize(texte_concat.lower())\n",
" \n",
" # Suppression des mots vides (stopwords)\n",
" stop_words = set(stopwords.words('french'))\n",
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
" \n",
" # Lemmatisation des mots\n",
" lemmatizer = WordNetLemmatizer()\n",
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
" \n",
" return lemmatized_tokens\n",
"\n",
2024-02-10 22:46:56 +01:00
"# Appliquer la fonction de prétraitement à la colonne de texte\n",
"df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n",
"\n",
"# Afficher le résultat\n",
"print(df)\n"
]
},
{
"cell_type": "markdown",
"id": "711d3884",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Campaign area"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "c25b5295",
"metadata": {},
"outputs": [],
"source": [
"# campaign_stats cleaning \n",
"df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
"cleaning_date(df1_campaign_stats_clean, 'opened_at')\n",
"cleaning_date(df1_campaign_stats_clean, 'sent_at')\n",
"cleaning_date(df1_campaign_stats_clean, 'delivered_at')\n",
"\n",
"# campaigns cleaning\n",
"df1_campaigns_clean = df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
"cleaning_date(df1_campaigns_clean, 'campaign_sent_at')\n",
"\n",
"# Merge \n",
"df1_campaigns_full = pd.merge(df1_campaign_stats_clean, df1_campaigns_clean, on = \"campaign_id\", how = \"left\")\n",
"df1_campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "2a3de6a5",
"metadata": {},
"outputs": [],
"source": [
"df1_campaigns_full.info()"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "3fc1f446",
"metadata": {},
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
"df1_campaigns_information"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "20e69ee3",
2024-02-04 16:02:01 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Link area"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "d9cbdbce",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
"df1_campaigns"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "c07459f0",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-02-10 22:46:56 +01:00
"outputs": [],
2024-02-04 16:02:01 +01:00
"source": [
"df1_link_stats"
]
},
{
"cell_type": "markdown",
2024-02-10 22:46:56 +01:00
"id": "80ae4c42",
2024-03-04 23:30:25 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
2024-02-04 16:02:01 +01:00
"source": [
2024-03-04 23:30:25 +01:00
"## Supplier"
2024-02-04 16:02:01 +01:00
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "b50b8f95",
2024-02-04 16:02:01 +01:00
"metadata": {},
"outputs": [],
"source": [
"# Fonction d'exploration pour suppliers.csv = label itr et commission inconnues\n",
"def suppliers_exploration(suppliers = None) : \n",
" \n",
" # Taux de NaN pour ces colonnes\n",
" label_na = suppliers['label'].isna().sum()/len(suppliers)*100\n",
" itr_na = suppliers['itr'].isna().sum()/len(suppliers)*100\n",
" commission_na = suppliers['commission'].isna().sum()/len(suppliers)*100\n",
"\n",
" suppliers_desc = pd.DataFrame({'nb_suppliers' : [suppliers['name'].nunique()],\n",
" 'label_na' : [label_na],\n",
" 'itr_na' : [itr_na],\n",
" 'commission_na' : [commission_na]})\n",
"\n",
" return suppliers_desc"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "7e292935",
2024-02-04 16:02:01 +01:00
"metadata": {},
"outputs": [],
"source": [
"df1_suppliers_desc = suppliers_exploration(suppliers = df1_suppliers)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "05b6f2b0",
2024-02-04 16:02:01 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
"source": [
"df1_suppliers_desc"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "c9324d80",
"metadata": {},
"outputs": [],
"source": [
"BUCKET = \"bdc2324-data\"\n",
"liste_folders = fs.ls(BUCKET)\n",
"\n",
"liste_files = []\n",
"for company_folder in liste_folders : \n",
" liste_files.extend(fs.ls(company_folder))"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "10304058",
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
"source": [
"liste_database_select = ['suppliers']\n",
"\n",
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
"liste_suppliers = [element for element in liste_files if any(element_part in element for element_part in liste_database_select)]\n",
"\n",
"# Afficher le résultat\n",
"print(liste_suppliers)"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "ffa423e5",
"metadata": {},
"outputs": [],
"source": [
"# loop to create dataframes from file 2\n",
"def database_loading(database_name = None):\n",
" files_path = database_name\n",
" \n",
2024-02-10 13:23:44 +01:00
" client_number = files_path.split(\"/\")[1]\n",
" df_prefix = \"df\" + str(client_number) + \"_\"\n",
" \n",
2024-02-10 13:23:44 +01:00
" current_path = files_path\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
"\n",
" return df, client_number"
]
},
{
"cell_type": "code",
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "70bdc88d",
2024-02-10 13:23:44 +01:00
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "6a0f567d",
2024-02-10 13:23:44 +01:00
"metadata": {},
"outputs": [],
"source": [
"df_all = pd.DataFrame()\n",
"\n",
"for link in liste_suppliers:\n",
" \n",
" df_supplier, tenant_id = database_loading(link)\n",
" \n",
" df_supplier['tenant_id'] = int(tenant_id)\n",
"\n",
2024-02-10 13:23:44 +01:00
" df_all = pd.concat([df_all, df_supplier], axis = 0)\n",
" "
]
2024-02-10 13:23:44 +01:00
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "1522d8cd",
2024-02-10 13:23:44 +01:00
"metadata": {},
"outputs": [],
"source": [
"# df_all[df_all['tenant_id'] == 101]['name'].unique()"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "b0e42a61",
2024-02-10 13:23:44 +01:00
"metadata": {},
"outputs": [],
"source": [
"liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] \n",
"# vad = vente à distance\n",
"df_all['name'] = df_all['name'].fillna('')\n",
"\n",
"df_all['canal_vente_internet'] = df_all['name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n"
]
},
{
"cell_type": "code",
2024-03-23 10:48:47 +01:00
"execution_count": null,
2024-02-10 22:46:56 +01:00
"id": "d299ae91",
2024-02-10 13:23:44 +01:00
"metadata": {},
2024-03-23 10:48:47 +01:00
"outputs": [],
2024-02-10 13:23:44 +01:00
"source": [
"df_all.groupby('tenant_id')['canal_vente_internet'].max()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-02-19 23:11:28 +01:00
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}