6008 lines
208 KiB
Plaintext
6008 lines
208 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "5bf5c226",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Business Data Challenge - Team 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "b1a5b9d3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import os\n",
|
|
"import s3fs\n",
|
|
"import re\n",
|
|
"import warnings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ecfa2219",
|
|
"metadata": {},
|
|
"source": [
|
|
"Configuration de l'accès aux données"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "1a094277",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create filesystem object\n",
|
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "30d77451-2df6-4c07-8b15-66e0e990ff03",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create filesystem object\n",
|
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
|
"\n",
|
|
"\n",
|
|
"# Import cleaning and merge functions\n",
|
|
"\n",
|
|
"exec(open('0_Cleaning_and_merge_functions.py').read())\n",
|
|
"\n",
|
|
"exec(open('0_KPI_functions.py').read())\n",
|
|
"\n",
|
|
"# Ignore warning\n",
|
|
"warnings.filterwarnings('ignore')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "f1b44d3e-76bb-4860-b9db-a2840db7cf39",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def load_dataset_2(directory_path, file_name):\n",
|
|
" \"\"\"\n",
|
|
" This function loads csv file\n",
|
|
" \"\"\"\n",
|
|
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
|
|
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
|
" df = pd.read_csv(file_in, sep=\",\")\n",
|
|
"\n",
|
|
" # drop na :\n",
|
|
" #df = df.dropna(axis=1, thresh=len(df))\n",
|
|
" # if identifier in table : delete it\n",
|
|
" if 'identifier' in df.columns:\n",
|
|
" df = df.drop(columns = 'identifier')\n",
|
|
" return df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "31ab76f0-fbb1-46f6-b359-97228620c207",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def export_in_temporary(df, output_name):\n",
|
|
" print('Export of dataset :', output_name)\n",
|
|
" FILE_PATH_OUT_S3 = \"projet-bdc2324-team1/Temporary\" + \"/\" + output_name + '.csv'\n",
|
|
" with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
|
|
" df.to_csv(file_out, index = False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ccf597b0-b459-4ea5-baf0-5ba8c90915e4",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Cleaning target area and tags"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "28316e1d-7892-4506-9d53-0695e71aa7bc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"target_example = preprocessing_target_area('1')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "fd88e294-e038-4cec-ad94-2bbbc10a4059",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>target_jeune</th>\n",
|
|
" <th>target_optin</th>\n",
|
|
" <th>target_optout</th>\n",
|
|
" <th>target_scolaire</th>\n",
|
|
" <th>target_entreprise</th>\n",
|
|
" <th>target_famille</th>\n",
|
|
" <th>target_newsletter</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>customer_id</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" target_jeune target_optin target_optout target_scolaire \\\n",
|
|
"customer_id \n",
|
|
"1 1 1 1 1 \n",
|
|
"2 1 1 1 1 \n",
|
|
"3 1 1 0 0 \n",
|
|
"4 1 1 0 0 \n",
|
|
"5 1 1 0 0 \n",
|
|
"\n",
|
|
" target_entreprise target_famille target_newsletter \n",
|
|
"customer_id \n",
|
|
"1 1 0 0 \n",
|
|
"2 1 0 1 \n",
|
|
"3 0 0 1 \n",
|
|
"4 0 0 0 \n",
|
|
"5 0 0 0 "
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"tenant_id = '1'\n",
|
|
"\n",
|
|
"def concatenate_names(names):\n",
|
|
" return ', '.join(names)\n",
|
|
" \n",
|
|
"target_example =display_input_databases(tenant_id, \"target_information\")\n",
|
|
"\n",
|
|
"target_example['target_name'] = target_example['target_name'].fillna('').str.lower()\n",
|
|
"\n",
|
|
"\n",
|
|
"target_example['target_jeune'] = target_example['target_name'].str.contains('|'.join(['jeune', 'pass_culture', 'etudiant', '12-25 ans', 'student', 'jeunesse']), case=False).astype(int)\n",
|
|
"target_example['target_optin'] = target_example['target_name'].str.contains('|'.join(['optin' ,'opt-in']), case=False).astype(int)\n",
|
|
"target_example['target_optout'] = target_example['target_name'].str.contains('|'.join(['optout', 'unsubscribed']), case=False).astype(int)\n",
|
|
"target_example['target_scolaire'] = target_example['target_name'].str.contains('|'.join(['scolaire' , 'enseignant', 'chercheur', 'schulen', 'école']), case=False).astype(int)\n",
|
|
"target_example['target_entreprise'] = target_example['target_name'].str.contains('|'.join(['b2b', 'btob', 'cse']), case=False).astype(int)\n",
|
|
"target_example['target_famille'] = target_example['target_name'].str.contains('|'.join(['famille', 'enfants', 'family']), case=False).astype(int)\n",
|
|
"target_example['target_newsletter'] = target_example['target_name'].str.contains('|'.join(['nl', 'newsletter']), case=False).astype(int)\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"target_agg = target_example.groupby('customer_id').agg(\n",
|
|
" nb_targets=('target_name', 'nunique'), # Utilisation de tuples pour spécifier les noms de colonnes\n",
|
|
" all_targets=('target_name', concatenate_names),\n",
|
|
" all_target_types=('target_type_name', concatenate_names)\n",
|
|
" ).reset_index()\n",
|
|
"\n",
|
|
"target_example_categorie = target_example.groupby('customer_id')[['target_jeune', 'target_optin', 'target_optout', 'target_scolaire', 'target_entreprise', 'target_famille', 'target_newsletter']].max()\n",
|
|
"\n",
|
|
"target_example_categorie.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c75efea3-b5e8-4a7a-bed4-dd64ae9ff9f2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#export_in_temporary(target_agg, 'Target_kpi_concatenate')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "5d91263e-8a97-4cb1-8d94-db8ab0b77cdf",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"# Brouillon"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c5e864b1-adad-4267-b956-3f7ef371d677",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"def display_covering_time(df, company, datecover):\n",
|
|
" \"\"\"\n",
|
|
" This function draws the time coverage of each company\n",
|
|
" \"\"\"\n",
|
|
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
|
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
|
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
|
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
|
" return datecover\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_time_intersection(datecover):\n",
|
|
" \"\"\"\n",
|
|
" This function returns the time coverage for all companies\n",
|
|
" \"\"\"\n",
|
|
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
|
" intersection = set.intersection(*timestamps_sets)\n",
|
|
" intersection_list = list(intersection)\n",
|
|
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
|
" return sorted(formated_dates)\n",
|
|
"\n",
|
|
"\n",
|
|
"def df_coverage_modelization(sport, coverage_features = 0.7):\n",
|
|
" \"\"\"\n",
|
|
" This function returns start_date, end_of_features and final dates\n",
|
|
" that help to construct train and test datasets\n",
|
|
" \"\"\"\n",
|
|
" datecover = {}\n",
|
|
" for company in sport:\n",
|
|
" df_products_purchased_reduced = display_input_databases(company, file_name = \"products_purchased_reduced\",\n",
|
|
" datetime_col = ['purchase_date'])\n",
|
|
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
|
" #print(datecover.keys())\n",
|
|
" dt_coverage = compute_time_intersection(datecover)\n",
|
|
" start_date = dt_coverage[0]\n",
|
|
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
|
" final_date = dt_coverage[-1]\n",
|
|
" return start_date, end_of_features, final_date\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"# Bases non communes : mise à plat"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f8f988fb-5aab-4b57-80d1-e242f7e5b384",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"companies = {'musee' : ['1', '2', '3', '4'],\n",
|
|
" 'sport': ['5', '6', '7', '8', '9'],\n",
|
|
" 'musique' : ['10', '11', '12', '13', '14']}\n",
|
|
"\n",
|
|
"all_companies = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "35ac004f-c191-4f45-a4b1-6d993d9ec38c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"companies_databases = pd.DataFrame()\n",
|
|
"\n",
|
|
"for i in all_companies:\n",
|
|
" company_databases = pd.DataFrame({'company_number' : [i]})\n",
|
|
"\n",
|
|
" BUCKET = \"bdc2324-data/\"+i\n",
|
|
" for base in fs.ls(BUCKET):\n",
|
|
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
|
|
" if match:\n",
|
|
" nom_base = match.group(3)\n",
|
|
" company_databases[nom_base] = 1\n",
|
|
"\n",
|
|
" companies_databases = pd.concat([companies_databases, company_databases])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8986e477-e6c5-4d6c-83b2-2c90c134b599",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.set_option(\"display.max_columns\", None)\n",
|
|
"companies_databases\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8fecc3bb-4c03-4144-97c5-615224d9729e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.reset_option(\"display.max_columns\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0294ce71-840e-458b-8ffa-cadabbc6da21",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Debut Travail 25/02"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## Base communes au types Musée"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dbce1124-9a22-4502-a47a-fc3d0e2db70b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"companies['musee']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5080f66e-f779-410a-876d-b4fe2795e17e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for i in companies['musique']:\n",
|
|
" BUCKET = \"bdc2324-data/\"+i\n",
|
|
" liste_base = []\n",
|
|
" for base in fs.ls(BUCKET):\n",
|
|
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
|
|
" if match:\n",
|
|
" nom_base = match.group(3)\n",
|
|
" liste_base.append(nom_base)\n",
|
|
" globals()['base_'+i] = liste_base\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "abd477e1-7479-4c88-a5aa-f987af3f5b79",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Trouver l'intersection entre les cinq listes\n",
|
|
"intersection = set(base_1).intersection(base_2, base_3, base_4, base_101)\n",
|
|
"\n",
|
|
"# Convertir le résultat en liste si nécessaire\n",
|
|
"intersection_liste = list(intersection)\n",
|
|
"\n",
|
|
"print(intersection_liste)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8d93888f-a511-4ee5-8bc3-d5173a7f119e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Trouver l'intersection entre les cinq listes\n",
|
|
"intersection = set(base_10).intersection(base_12, base_13, base_14, base_11)\n",
|
|
"\n",
|
|
"# Convertir le résultat en liste si nécessaire\n",
|
|
"intersection_liste = list(intersection)\n",
|
|
"\n",
|
|
"print(intersection_liste)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "10e89669-42bb-4652-a4bc-1a3d1caf4d1a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"len(intersection_liste)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7d058b21-a538-4f59-aefb-ef7966f73fdc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_tags = load_dataset_2(\"1\", \"tags\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "aa441f99-733c-4675-8676-bed4682d3324",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_structure_tag_mappings = load_dataset_2(\"1\", 'structure_tag_mappings')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6767a750-14a4-4c05-903e-d2f07170825b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_customersplus = load_dataset_2(\"1\", \"customersplus\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "125e9145-a815-46fd-bdf4-07589508b259",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_customersplus.groupby('structure_id')['id'].count().reset_index().sort_values('id', ascending=False).head(20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c17a6976-792f-474d-bcff-c89396eddb3f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_customersplus['structure_id'].isna().sum() / len(df1_customersplus['structure_id'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ecfc155a-cb42-46ec-8da5-33fdcd087355",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"len(df1_structure_tag_mappings)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "071410b8-950d-4fcc-b2b9-57415253c286",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_structure_tag_mappings.groupby('tag_id')['structure_id'].count().reset_index().sort_values('structure_id', ascending=False).head(20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f48d27a9-14e4-4bb9-a60a-73e9438b58fc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"?np.sort_values()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "14eaa0ea-02cc-430b-ab9b-38e6637810c3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def info_colonnes_dataframe(df):\n",
|
|
" # Créer une liste pour stocker les informations sur chaque colonne\n",
|
|
" infos_colonnes = []\n",
|
|
"\n",
|
|
" # Parcourir les colonnes du DataFrame\n",
|
|
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
|
|
" # Calculer le taux de valeurs manquantes\n",
|
|
" taux_na = serie.isna().mean() * 100\n",
|
|
"\n",
|
|
" # Ajouter les informations à la liste\n",
|
|
" infos_colonnes.append({\n",
|
|
" 'Nom_colonne': nom_colonne,\n",
|
|
" 'Type_colonne': str(serie.dtype),\n",
|
|
" 'Taux_NA': taux_na\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
|
|
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
|
|
"\n",
|
|
" return df_infos_colonnes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6b031c32-d4c8-42a5-9a71-a7810f9bf8d8",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"info_colonnes_dataframe(df1_tags)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e1a87f27-c4d4-4832-ac20-0c3c54aa4980",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"info_colonnes_dataframe(df1_structure_tag_mappings)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "fa5c65a8-2f74-4f3f-85fc-9ac91e0bb361",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.set_option('display.max_colwidth', None)\n",
|
|
"\n",
|
|
"print(df1_tags['name'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a59bf932-5b54-4600-81f5-c55ac93ae510",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.set_option('display.max_rows', None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a4ab298e-2cae-4865-9f00-4caff5f75ea1",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(df1_tags['name'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "76bffba1-5f7e-4308-9224-437ca66148f8",
|
|
"metadata": {},
|
|
"source": [
|
|
"## KPI sur target_type"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f6daf22e-6583-4431-a467-660a1dd4e5a4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "d91d5895",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.set_option('display.max_colwidth', None)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c58b17d3",
|
|
"metadata": {},
|
|
"source": [
|
|
"Raisonnement : on prends les target_type qui représente 90% des clients et on fait des catégories dessus."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "6930bff5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def print_main_target(tenant_id, nb_print = 40):\n",
|
|
" df_target = display_input_databases(tenant_id, \"target_information\")\n",
|
|
"\n",
|
|
" print('Nombre de ciblage : ', len(df_target))\n",
|
|
" nb_customers = df_target['customer_id'].nunique()\n",
|
|
" print('Nombre de client avec étiquette target : ', nb_customers) \n",
|
|
"\n",
|
|
" nb_custumers_per_target = df_target.groupby(\"target_name\")['customer_id'].count().reset_index().sort_values('customer_id', ascending=False)\n",
|
|
" nb_custumers_per_target['cumulative_customers'] = nb_custumers_per_target['customer_id'].cumsum()/len(df_target)\n",
|
|
" nb_custumers_per_target['customer_id'] = nb_custumers_per_target['customer_id']/nb_customers\n",
|
|
"\n",
|
|
" return nb_custumers_per_target.head(nb_print)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "08488e43-56e0-461b-8770-c4e68d5c09f4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.set_option('display.max_rows', None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "1e7ee1a0",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n",
|
|
"Nombre de ciblage : 768024\n",
|
|
"Nombre de client avec étiquette target : 151159\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>target_name</th>\n",
|
|
" <th>customer_id</th>\n",
|
|
" <th>cumulative_customers</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>161</th>\n",
|
|
" <td>consentement optin mediation specialisee</td>\n",
|
|
" <td>0.992333</td>\n",
|
|
" <td>0.195306</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>160</th>\n",
|
|
" <td>consentement optin jeune public</td>\n",
|
|
" <td>0.992194</td>\n",
|
|
" <td>0.390585</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>158</th>\n",
|
|
" <td>consentement optin b2c</td>\n",
|
|
" <td>0.720493</td>\n",
|
|
" <td>0.532390</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Arenametrix_bascule tel vers sib</td>\n",
|
|
" <td>0.232973</td>\n",
|
|
" <td>0.578242</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>165</th>\n",
|
|
" <td>consentement optout b2c</td>\n",
|
|
" <td>0.228389</td>\n",
|
|
" <td>0.623193</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>COM Inscrits NL générale (historique)</td>\n",
|
|
" <td>0.152191</td>\n",
|
|
" <td>0.653146</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>162</th>\n",
|
|
" <td>consentement optin newsletter generale</td>\n",
|
|
" <td>0.146171</td>\n",
|
|
" <td>0.681915</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>169</th>\n",
|
|
" <td>consentement optout newsletter generale</td>\n",
|
|
" <td>0.124736</td>\n",
|
|
" <td>0.706465</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>170</th>\n",
|
|
" <td>consentement optout scolaires</td>\n",
|
|
" <td>0.104155</td>\n",
|
|
" <td>0.726964</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>166</th>\n",
|
|
" <td>consentement optout dre</td>\n",
|
|
" <td>0.094788</td>\n",
|
|
" <td>0.745620</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>164</th>\n",
|
|
" <td>consentement optout b2b</td>\n",
|
|
" <td>0.094067</td>\n",
|
|
" <td>0.764134</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>126</th>\n",
|
|
" <td>Inscrits NL générale (export_291019 + operation_videomaton)</td>\n",
|
|
" <td>0.093187</td>\n",
|
|
" <td>0.782474</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>157</th>\n",
|
|
" <td>consentement optin b2b</td>\n",
|
|
" <td>0.084249</td>\n",
|
|
" <td>0.799056</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>216</th>\n",
|
|
" <td>ddcp_visiteurs dps 010622</td>\n",
|
|
" <td>0.081735</td>\n",
|
|
" <td>0.815142</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>Contacts_prenomsdoubles</td>\n",
|
|
" <td>0.077025</td>\n",
|
|
" <td>0.830302</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>115</th>\n",
|
|
" <td>FORMATION _ acheteurs optin last year</td>\n",
|
|
" <td>0.069364</td>\n",
|
|
" <td>0.843954</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>214</th>\n",
|
|
" <td>ddcp_promo_visiteurs occasionnels_musee_8mois</td>\n",
|
|
" <td>0.043927</td>\n",
|
|
" <td>0.852600</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>189</th>\n",
|
|
" <td>ddcp_promo_md_musée_dps 011019</td>\n",
|
|
" <td>0.039759</td>\n",
|
|
" <td>0.860425</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>188</th>\n",
|
|
" <td>ddcp_promo_MD_billet_musée_oct_2019_agarder2</td>\n",
|
|
" <td>0.036266</td>\n",
|
|
" <td>0.867563</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>163</th>\n",
|
|
" <td>consentement optin scolaires</td>\n",
|
|
" <td>0.032079</td>\n",
|
|
" <td>0.873876</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>159</th>\n",
|
|
" <td>consentement optin dre</td>\n",
|
|
" <td>0.029949</td>\n",
|
|
" <td>0.879771</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>34</th>\n",
|
|
" <td>DDCP Newsletter enseignants</td>\n",
|
|
" <td>0.029836</td>\n",
|
|
" <td>0.885643</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>36</th>\n",
|
|
" <td>DDCP Newsletter jeune public</td>\n",
|
|
" <td>0.025549</td>\n",
|
|
" <td>0.890671</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>127</th>\n",
|
|
" <td>Inscrits NL générale site web</td>\n",
|
|
" <td>0.024689</td>\n",
|
|
" <td>0.895531</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>145</th>\n",
|
|
" <td>Votre première liste</td>\n",
|
|
" <td>0.024577</td>\n",
|
|
" <td>0.900368</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>61</th>\n",
|
|
" <td>DDCP billets famille</td>\n",
|
|
" <td>0.023876</td>\n",
|
|
" <td>0.905067</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>106</th>\n",
|
|
" <td>DRE MucemLab</td>\n",
|
|
" <td>0.015229</td>\n",
|
|
" <td>0.908064</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>39</th>\n",
|
|
" <td>DDCP Newsletter relais champ social</td>\n",
|
|
" <td>0.015017</td>\n",
|
|
" <td>0.911020</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>110</th>\n",
|
|
" <td>DRE institutionnels</td>\n",
|
|
" <td>0.014746</td>\n",
|
|
" <td>0.913922</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>48</th>\n",
|
|
" <td>DDCP PROMO Participants ateliers (adultes et enfants)</td>\n",
|
|
" <td>0.012927</td>\n",
|
|
" <td>0.916466</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>74</th>\n",
|
|
" <td>DDCP promo Plan B 2019 (concerts)</td>\n",
|
|
" <td>0.012887</td>\n",
|
|
" <td>0.919003</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>72</th>\n",
|
|
" <td>DDCP promo MD pass musées dps oct 2018</td>\n",
|
|
" <td>0.011809</td>\n",
|
|
" <td>0.921327</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>94</th>\n",
|
|
" <td>DDCP rentrée culturelle 2023</td>\n",
|
|
" <td>0.011624</td>\n",
|
|
" <td>0.923614</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>DDCP MD Procès du Siècle</td>\n",
|
|
" <td>0.011141</td>\n",
|
|
" <td>0.925807</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>186</th>\n",
|
|
" <td>ddcp_md_scene_ouverte_au_talent</td>\n",
|
|
" <td>0.010433</td>\n",
|
|
" <td>0.927860</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>108</th>\n",
|
|
" <td>DRE chercheurs</td>\n",
|
|
" <td>0.010300</td>\n",
|
|
" <td>0.929888</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>220</th>\n",
|
|
" <td>festival_jean_rouch</td>\n",
|
|
" <td>0.009937</td>\n",
|
|
" <td>0.931843</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>105</th>\n",
|
|
" <td>DRE Festival Jean Rouch</td>\n",
|
|
" <td>0.009937</td>\n",
|
|
" <td>0.933799</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>275</th>\n",
|
|
" <td>structures_etiquette champ social</td>\n",
|
|
" <td>0.009844</td>\n",
|
|
" <td>0.935736</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>86</th>\n",
|
|
" <td>DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)</td>\n",
|
|
" <td>0.008554</td>\n",
|
|
" <td>0.937420</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>128</th>\n",
|
|
" <td>Inscrits NL jeune public site web</td>\n",
|
|
" <td>0.008263</td>\n",
|
|
" <td>0.939046</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>260</th>\n",
|
|
" <td>rappel po barvalo</td>\n",
|
|
" <td>0.008256</td>\n",
|
|
" <td>0.940671</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>104</th>\n",
|
|
" <td>DDCP_marseille_jazz_2023</td>\n",
|
|
" <td>0.006900</td>\n",
|
|
" <td>0.942029</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32</th>\n",
|
|
" <td>DDCP Newsletter centres de loisirs</td>\n",
|
|
" <td>0.006827</td>\n",
|
|
" <td>0.943373</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>Autres_interet_exposition</td>\n",
|
|
" <td>0.006754</td>\n",
|
|
" <td>0.944702</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>228</th>\n",
|
|
" <td>import_arenametrix_contactstousecardouv_expo</td>\n",
|
|
" <td>0.006212</td>\n",
|
|
" <td>0.945925</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>117</th>\n",
|
|
" <td>Formation clients fidèles</td>\n",
|
|
" <td>0.006047</td>\n",
|
|
" <td>0.947115</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>DDCP Cine 2023</td>\n",
|
|
" <td>0.005656</td>\n",
|
|
" <td>0.948228</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>40</th>\n",
|
|
" <td>DDCP OLBJ! 2023</td>\n",
|
|
" <td>0.005464</td>\n",
|
|
" <td>0.949304</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>240</th>\n",
|
|
" <td>journee-de-l-inclusion_20230601_21h25</td>\n",
|
|
" <td>0.005326</td>\n",
|
|
" <td>0.950352</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>137</th>\n",
|
|
" <td>Questionnaire 2 satisfaction scolaire</td>\n",
|
|
" <td>0.005259</td>\n",
|
|
" <td>0.951387</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>93</th>\n",
|
|
" <td>DDCP rendez-vous de septembre offre spéciale</td>\n",
|
|
" <td>0.005253</td>\n",
|
|
" <td>0.952421</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>135</th>\n",
|
|
" <td>Plan B 2018 (électro)</td>\n",
|
|
" <td>0.005081</td>\n",
|
|
" <td>0.953421</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>270</th>\n",
|
|
" <td>save_the_date_populaire</td>\n",
|
|
" <td>0.004948</td>\n",
|
|
" <td>0.954395</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>132</th>\n",
|
|
" <td>Newsletter CCR (passerelle)</td>\n",
|
|
" <td>0.004783</td>\n",
|
|
" <td>0.955336</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>116</th>\n",
|
|
" <td>Fichier institutionnel (ne pas utiliser sans autorisation)</td>\n",
|
|
" <td>0.004538</td>\n",
|
|
" <td>0.956229</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>222</th>\n",
|
|
" <td>fichier institutionnel_ne_pas_toucher</td>\n",
|
|
" <td>0.004532</td>\n",
|
|
" <td>0.957121</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>266</th>\n",
|
|
" <td>reservations_payees_pass_culture_190422_au_310123</td>\n",
|
|
" <td>0.004492</td>\n",
|
|
" <td>0.958005</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>102</th>\n",
|
|
" <td>DDCP spectateurs Marseille Jazz 18-19-21</td>\n",
|
|
" <td>0.004432</td>\n",
|
|
" <td>0.958878</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>147</th>\n",
|
|
" <td>acid arab</td>\n",
|
|
" <td>0.004413</td>\n",
|
|
" <td>0.959746</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" target_name \\\n",
|
|
"161 consentement optin mediation specialisee \n",
|
|
"160 consentement optin jeune public \n",
|
|
"158 consentement optin b2c \n",
|
|
"5 Arenametrix_bascule tel vers sib \n",
|
|
"165 consentement optout b2c \n",
|
|
"19 COM Inscrits NL générale (historique) \n",
|
|
"162 consentement optin newsletter generale \n",
|
|
"169 consentement optout newsletter generale \n",
|
|
"170 consentement optout scolaires \n",
|
|
"166 consentement optout dre \n",
|
|
"164 consentement optout b2b \n",
|
|
"126 Inscrits NL générale (export_291019 + operation_videomaton) \n",
|
|
"157 consentement optin b2b \n",
|
|
"216 ddcp_visiteurs dps 010622 \n",
|
|
"20 Contacts_prenomsdoubles \n",
|
|
"115 FORMATION _ acheteurs optin last year \n",
|
|
"214 ddcp_promo_visiteurs occasionnels_musee_8mois \n",
|
|
"189 ddcp_promo_md_musée_dps 011019 \n",
|
|
"188 ddcp_promo_MD_billet_musée_oct_2019_agarder2 \n",
|
|
"163 consentement optin scolaires \n",
|
|
"159 consentement optin dre \n",
|
|
"34 DDCP Newsletter enseignants \n",
|
|
"36 DDCP Newsletter jeune public \n",
|
|
"127 Inscrits NL générale site web \n",
|
|
"145 Votre première liste \n",
|
|
"61 DDCP billets famille \n",
|
|
"106 DRE MucemLab \n",
|
|
"39 DDCP Newsletter relais champ social \n",
|
|
"110 DRE institutionnels \n",
|
|
"48 DDCP PROMO Participants ateliers (adultes et enfants) \n",
|
|
"74 DDCP promo Plan B 2019 (concerts) \n",
|
|
"72 DDCP promo MD pass musées dps oct 2018 \n",
|
|
"94 DDCP rentrée culturelle 2023 \n",
|
|
"23 DDCP MD Procès du Siècle \n",
|
|
"186 ddcp_md_scene_ouverte_au_talent \n",
|
|
"108 DRE chercheurs \n",
|
|
"220 festival_jean_rouch \n",
|
|
"105 DRE Festival Jean Rouch \n",
|
|
"275 structures_etiquette champ social \n",
|
|
"86 DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers) \n",
|
|
"128 Inscrits NL jeune public site web \n",
|
|
"260 rappel po barvalo \n",
|
|
"104 DDCP_marseille_jazz_2023 \n",
|
|
"32 DDCP Newsletter centres de loisirs \n",
|
|
"13 Autres_interet_exposition \n",
|
|
"228 import_arenametrix_contactstousecardouv_expo \n",
|
|
"117 Formation clients fidèles \n",
|
|
"22 DDCP Cine 2023 \n",
|
|
"40 DDCP OLBJ! 2023 \n",
|
|
"240 journee-de-l-inclusion_20230601_21h25 \n",
|
|
"137 Questionnaire 2 satisfaction scolaire \n",
|
|
"93 DDCP rendez-vous de septembre offre spéciale \n",
|
|
"135 Plan B 2018 (électro) \n",
|
|
"270 save_the_date_populaire \n",
|
|
"132 Newsletter CCR (passerelle) \n",
|
|
"116 Fichier institutionnel (ne pas utiliser sans autorisation) \n",
|
|
"222 fichier institutionnel_ne_pas_toucher \n",
|
|
"266 reservations_payees_pass_culture_190422_au_310123 \n",
|
|
"102 DDCP spectateurs Marseille Jazz 18-19-21 \n",
|
|
"147 acid arab \n",
|
|
"\n",
|
|
" customer_id cumulative_customers \n",
|
|
"161 0.992333 0.195306 \n",
|
|
"160 0.992194 0.390585 \n",
|
|
"158 0.720493 0.532390 \n",
|
|
"5 0.232973 0.578242 \n",
|
|
"165 0.228389 0.623193 \n",
|
|
"19 0.152191 0.653146 \n",
|
|
"162 0.146171 0.681915 \n",
|
|
"169 0.124736 0.706465 \n",
|
|
"170 0.104155 0.726964 \n",
|
|
"166 0.094788 0.745620 \n",
|
|
"164 0.094067 0.764134 \n",
|
|
"126 0.093187 0.782474 \n",
|
|
"157 0.084249 0.799056 \n",
|
|
"216 0.081735 0.815142 \n",
|
|
"20 0.077025 0.830302 \n",
|
|
"115 0.069364 0.843954 \n",
|
|
"214 0.043927 0.852600 \n",
|
|
"189 0.039759 0.860425 \n",
|
|
"188 0.036266 0.867563 \n",
|
|
"163 0.032079 0.873876 \n",
|
|
"159 0.029949 0.879771 \n",
|
|
"34 0.029836 0.885643 \n",
|
|
"36 0.025549 0.890671 \n",
|
|
"127 0.024689 0.895531 \n",
|
|
"145 0.024577 0.900368 \n",
|
|
"61 0.023876 0.905067 \n",
|
|
"106 0.015229 0.908064 \n",
|
|
"39 0.015017 0.911020 \n",
|
|
"110 0.014746 0.913922 \n",
|
|
"48 0.012927 0.916466 \n",
|
|
"74 0.012887 0.919003 \n",
|
|
"72 0.011809 0.921327 \n",
|
|
"94 0.011624 0.923614 \n",
|
|
"23 0.011141 0.925807 \n",
|
|
"186 0.010433 0.927860 \n",
|
|
"108 0.010300 0.929888 \n",
|
|
"220 0.009937 0.931843 \n",
|
|
"105 0.009937 0.933799 \n",
|
|
"275 0.009844 0.935736 \n",
|
|
"86 0.008554 0.937420 \n",
|
|
"128 0.008263 0.939046 \n",
|
|
"260 0.008256 0.940671 \n",
|
|
"104 0.006900 0.942029 \n",
|
|
"32 0.006827 0.943373 \n",
|
|
"13 0.006754 0.944702 \n",
|
|
"228 0.006212 0.945925 \n",
|
|
"117 0.006047 0.947115 \n",
|
|
"22 0.005656 0.948228 \n",
|
|
"40 0.005464 0.949304 \n",
|
|
"240 0.005326 0.950352 \n",
|
|
"137 0.005259 0.951387 \n",
|
|
"93 0.005253 0.952421 \n",
|
|
"135 0.005081 0.953421 \n",
|
|
"270 0.004948 0.954395 \n",
|
|
"132 0.004783 0.955336 \n",
|
|
"116 0.004538 0.956229 \n",
|
|
"222 0.004532 0.957121 \n",
|
|
"266 0.004492 0.958005 \n",
|
|
"102 0.004432 0.958878 \n",
|
|
"147 0.004413 0.959746 "
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.set_option(\"max_colwidth\", None)\n",
|
|
"print_main_target('1', 60)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "c66a4dc1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.set_option('display.max_rows', None)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "19f3a2dd-ba3d-4dec-8e10-fed544ab6a53",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.reset_option('display.max_rows')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b57a28ac",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print_main_target('2', 25)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9a65991f",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print_main_target('3', 70)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5f34b8bf",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print_main_target('4', 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "40fe3676",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.set_option('display.max_rows', None)\n",
|
|
"\n",
|
|
"print_main_target('5', 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "820d3600-379b-4245-a977-f1f1fa1f1839",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print_main_target('6', 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "86f64a1b-763a-4e43-9601-a38c80392d47",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print_main_target('7', 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "fbf2ea42-515a-4cdf-a4c1-50f99c379ed9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print_main_target('8', 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "9684045c-4e25-4952-b099-a559baa5d749",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"File path : projet-bdc2324-team1/0_Input/Company_9/target_information.csv\n",
|
|
"Nombre de ciblage : 1399179\n",
|
|
"Nombre de client avec étiquette target : 181136\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>target_name</th>\n",
|
|
" <th>customer_id</th>\n",
|
|
" <th>cumulative_customers</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>184</th>\n",
|
|
" <td>Run Mate</td>\n",
|
|
" <td>0.999939</td>\n",
|
|
" <td>0.129451</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>233</th>\n",
|
|
" <td>Triathlon</td>\n",
|
|
" <td>0.999934</td>\n",
|
|
" <td>0.258901</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>77</th>\n",
|
|
" <td>HGM</td>\n",
|
|
" <td>0.999934</td>\n",
|
|
" <td>0.388351</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>20km Genève</td>\n",
|
|
" <td>0.999917</td>\n",
|
|
" <td>0.517799</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75</th>\n",
|
|
" <td>GM23</td>\n",
|
|
" <td>0.996561</td>\n",
|
|
" <td>0.646813</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>68</th>\n",
|
|
" <td>GGM - inscrits 2015 - 2023 - FR</td>\n",
|
|
" <td>0.261439</td>\n",
|
|
" <td>0.680658</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>85</th>\n",
|
|
" <td>HGM - Inscrits 2005-2019 - FR</td>\n",
|
|
" <td>0.243110</td>\n",
|
|
" <td>0.712131</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>187</th>\n",
|
|
" <td>RunMate - 500km autour montreux - FR</td>\n",
|
|
" <td>0.236612</td>\n",
|
|
" <td>0.742763</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>188</th>\n",
|
|
" <td>RunMate - 500km autour montreux - FR v2</td>\n",
|
|
" <td>0.204162</td>\n",
|
|
" <td>0.769193</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>46</th>\n",
|
|
" <td>BDD - Semi & Marathon_FR</td>\n",
|
|
" <td>0.169536</td>\n",
|
|
" <td>0.791141</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>83</th>\n",
|
|
" <td>HGM - GM Public - FR</td>\n",
|
|
" <td>0.154420</td>\n",
|
|
" <td>0.811132</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>84</th>\n",
|
|
" <td>HGM - Inscrits 2005-2019 - ENG</td>\n",
|
|
" <td>0.113401</td>\n",
|
|
" <td>0.825813</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>67</th>\n",
|
|
" <td>GGM - inscrits 2015 - 2023 - ENG</td>\n",
|
|
" <td>0.107405</td>\n",
|
|
" <td>0.839717</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>73</th>\n",
|
|
" <td>GGM23 - inscrits - FR</td>\n",
|
|
" <td>0.066734</td>\n",
|
|
" <td>0.848357</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>45</th>\n",
|
|
" <td>BDD - Semi & Marathon_ENG</td>\n",
|
|
" <td>0.063240</td>\n",
|
|
" <td>0.856544</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82</th>\n",
|
|
" <td>HGM - GM Public - ENG</td>\n",
|
|
" <td>0.061893</td>\n",
|
|
" <td>0.864556</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>95</th>\n",
|
|
" <td>LTGT - Inscrits 2008-2019 - FR</td>\n",
|
|
" <td>0.050371</td>\n",
|
|
" <td>0.871077</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>186</th>\n",
|
|
" <td>RunMate - 500km autour montreux - ENG</td>\n",
|
|
" <td>0.042316</td>\n",
|
|
" <td>0.876555</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>210</th>\n",
|
|
" <td>TMB - Opt-in</td>\n",
|
|
" <td>0.039738</td>\n",
|
|
" <td>0.881700</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>118</th>\n",
|
|
" <td>LTGT23-Amateur-FR</td>\n",
|
|
" <td>0.036067</td>\n",
|
|
" <td>0.886369</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>88</th>\n",
|
|
" <td>HGM désinscriptions</td>\n",
|
|
" <td>0.031893</td>\n",
|
|
" <td>0.890498</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>213</th>\n",
|
|
" <td>TMB - anciens participants 2015 - 2020 - FR+ENG</td>\n",
|
|
" <td>0.028145</td>\n",
|
|
" <td>0.894141</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>20km - Inscrits 2017-2019 - FR</td>\n",
|
|
" <td>0.027267</td>\n",
|
|
" <td>0.897671</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>100</th>\n",
|
|
" <td>LTGT - inscrits 2015 - 2020 - FR</td>\n",
|
|
" <td>0.026185</td>\n",
|
|
" <td>0.901061</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>20KM23_Inscrit NL 25.10_FR</td>\n",
|
|
" <td>0.023049</td>\n",
|
|
" <td>0.904045</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>185</th>\n",
|
|
" <td>Run Mate désinscriptions</td>\n",
|
|
" <td>0.021746</td>\n",
|
|
" <td>0.906860</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>303</th>\n",
|
|
" <td>test listes</td>\n",
|
|
" <td>0.021321</td>\n",
|
|
" <td>0.909621</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>72</th>\n",
|
|
" <td>GGM23 - inscrits - ENG</td>\n",
|
|
" <td>0.021304</td>\n",
|
|
" <td>0.912379</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>20KM23_FINISHER_FR2</td>\n",
|
|
" <td>0.020316</td>\n",
|
|
" <td>0.915009</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>20KM23_FINISHER_FR</td>\n",
|
|
" <td>0.020316</td>\n",
|
|
" <td>0.917639</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>38</th>\n",
|
|
" <td>20km23_nl finisher fr</td>\n",
|
|
" <td>0.020316</td>\n",
|
|
" <td>0.920269</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>107</th>\n",
|
|
" <td>LTGT21 - Inscrits FR+ENG</td>\n",
|
|
" <td>0.019058</td>\n",
|
|
" <td>0.922736</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>33</th>\n",
|
|
" <td>20km23_inscrits fr 05.10_test</td>\n",
|
|
" <td>0.017445</td>\n",
|
|
" <td>0.924995</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>20km Genève désinscriptions</td>\n",
|
|
" <td>0.017390</td>\n",
|
|
" <td>0.927246</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>113</th>\n",
|
|
" <td>LTGT22 - inscris FR+ENG</td>\n",
|
|
" <td>0.016601</td>\n",
|
|
" <td>0.929395</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>47</th>\n",
|
|
" <td>Bénévoles Mass Events</td>\n",
|
|
" <td>0.016501</td>\n",
|
|
" <td>0.931531</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32</th>\n",
|
|
" <td>20km23_inscrits fr 02.10</td>\n",
|
|
" <td>0.016479</td>\n",
|
|
" <td>0.933665</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>64</th>\n",
|
|
" <td>GGM - Contact PT OK</td>\n",
|
|
" <td>0.016065</td>\n",
|
|
" <td>0.935744</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>112</th>\n",
|
|
" <td>LTGT22 - Anciens participants Half, Half relais, standard, standard relais - FR</td>\n",
|
|
" <td>0.015425</td>\n",
|
|
" <td>0.937741</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>225</th>\n",
|
|
" <td>TMB23-Amateur-FR</td>\n",
|
|
" <td>0.013349</td>\n",
|
|
" <td>0.939470</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>110</th>\n",
|
|
" <td>LTGT21 - inscrits FR</td>\n",
|
|
" <td>0.012642</td>\n",
|
|
" <td>0.941106</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>219</th>\n",
|
|
" <td>TMB22 - inscrits FR</td>\n",
|
|
" <td>0.010732</td>\n",
|
|
" <td>0.942496</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>94</th>\n",
|
|
" <td>LTGT - Inscrits 2008-2019 - ENG</td>\n",
|
|
" <td>0.010688</td>\n",
|
|
" <td>0.943879</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>115</th>\n",
|
|
" <td>LTGT22 - inscrits FR</td>\n",
|
|
" <td>0.010368</td>\n",
|
|
" <td>0.945221</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>203</th>\n",
|
|
" <td>TIMB22 - incritS FR+ENG</td>\n",
|
|
" <td>0.010064</td>\n",
|
|
" <td>0.946524</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>20km22 - inscrits - FR</td>\n",
|
|
" <td>0.010009</td>\n",
|
|
" <td>0.947820</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>20km20 - Inscrits finaux - FR</td>\n",
|
|
" <td>0.009518</td>\n",
|
|
" <td>0.949052</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>20KM21 - inscrits FR</td>\n",
|
|
" <td>0.009507</td>\n",
|
|
" <td>0.950283</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>20KM - Contact PT OKb</td>\n",
|
|
" <td>0.009451</td>\n",
|
|
" <td>0.951507</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>175</th>\n",
|
|
" <td>RM21- Inscrits FR+ENG</td>\n",
|
|
" <td>0.009098</td>\n",
|
|
" <td>0.952684</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>222</th>\n",
|
|
" <td>TMB23 - inscrits FR + ENG</td>\n",
|
|
" <td>0.008518</td>\n",
|
|
" <td>0.953787</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>223</th>\n",
|
|
" <td>TMB23 - inscrits FR + ENG -</td>\n",
|
|
" <td>0.008518</td>\n",
|
|
" <td>0.954890</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>304</th>\n",
|
|
" <td>tmb - pro - fr - VF</td>\n",
|
|
" <td>0.008298</td>\n",
|
|
" <td>0.955964</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>221</th>\n",
|
|
" <td>TMB23 - inscrits FR</td>\n",
|
|
" <td>0.007834</td>\n",
|
|
" <td>0.956978</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>105</th>\n",
|
|
" <td>LTGT20 - Inscrits - FR</td>\n",
|
|
" <td>0.007746</td>\n",
|
|
" <td>0.957981</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>234</th>\n",
|
|
" <td>Triathlon désinscriptions</td>\n",
|
|
" <td>0.007740</td>\n",
|
|
" <td>0.958983</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>238</th>\n",
|
|
" <td>ggm23 - inscrits fr + eng</td>\n",
|
|
" <td>0.007276</td>\n",
|
|
" <td>0.959925</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>117</th>\n",
|
|
" <td>LTGT23-Amateur-ENG- vf</td>\n",
|
|
" <td>0.007166</td>\n",
|
|
" <td>0.960853</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>116</th>\n",
|
|
" <td>LTGT23-Amateur-ENG</td>\n",
|
|
" <td>0.007166</td>\n",
|
|
" <td>0.961780</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>173</th>\n",
|
|
" <td>RM21 - inscrits FR (reportés + new)</td>\n",
|
|
" <td>0.007160</td>\n",
|
|
" <td>0.962707</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>197</th>\n",
|
|
" <td>TE23_Inscrits_FR</td>\n",
|
|
" <td>0.007116</td>\n",
|
|
" <td>0.963629</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>153</th>\n",
|
|
" <td>RM - Contact PT OK</td>\n",
|
|
" <td>0.006967</td>\n",
|
|
" <td>0.964531</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>177</th>\n",
|
|
" <td>RM22 - inscrits FR</td>\n",
|
|
" <td>0.006879</td>\n",
|
|
" <td>0.965421</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>35</th>\n",
|
|
" <td>20km23_inscrits nl 1 fr - 17.08</td>\n",
|
|
" <td>0.006879</td>\n",
|
|
" <td>0.966312</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>20KM23_Inscrits NL 1 FR - 17.08</td>\n",
|
|
" <td>0.006879</td>\n",
|
|
" <td>0.967202</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>97</th>\n",
|
|
" <td>LTGT - Inscrits NL - FR</td>\n",
|
|
" <td>0.006669</td>\n",
|
|
" <td>0.968066</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>166</th>\n",
|
|
" <td>RM20 - inscrits 2020 (Total avec reports) - FR</td>\n",
|
|
" <td>0.006592</td>\n",
|
|
" <td>0.968919</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>297</th>\n",
|
|
" <td>rm23_nl finisher fr</td>\n",
|
|
" <td>0.006117</td>\n",
|
|
" <td>0.969711</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>180</th>\n",
|
|
" <td>RM23_NL Finisher FR</td>\n",
|
|
" <td>0.006117</td>\n",
|
|
" <td>0.970503</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>162</th>\n",
|
|
" <td>RM19 - Inscrits - FR</td>\n",
|
|
" <td>0.006106</td>\n",
|
|
" <td>0.971293</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>292</th>\n",
|
|
" <td>rm23_inscrits nl 19.09 fr</td>\n",
|
|
" <td>0.006089</td>\n",
|
|
" <td>0.972081</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>66</th>\n",
|
|
" <td>GGM - Inscrits NL - FR</td>\n",
|
|
" <td>0.006023</td>\n",
|
|
" <td>0.972861</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>93</th>\n",
|
|
" <td>LTGT - Contact PT OK</td>\n",
|
|
" <td>0.005973</td>\n",
|
|
" <td>0.973635</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>174</th>\n",
|
|
" <td>RM21 - new inscrits - FR</td>\n",
|
|
" <td>0.005808</td>\n",
|
|
" <td>0.974386</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>20km - Inscrits 2017-2019 - ENG</td>\n",
|
|
" <td>0.005543</td>\n",
|
|
" <td>0.975104</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>42</th>\n",
|
|
" <td>20km23_no show fr</td>\n",
|
|
" <td>0.005443</td>\n",
|
|
" <td>0.975809</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>20KM23_NO SHOW_FR</td>\n",
|
|
" <td>0.005443</td>\n",
|
|
" <td>0.976513</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>101</th>\n",
|
|
" <td>LTGT - inscrits 2015-2020 - ENG</td>\n",
|
|
" <td>0.005405</td>\n",
|
|
" <td>0.977213</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>20km - Inscrits NL - FR</td>\n",
|
|
" <td>0.005377</td>\n",
|
|
" <td>0.977909</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>109</th>\n",
|
|
" <td>LTGT21 - Reportés - FR</td>\n",
|
|
" <td>0.005366</td>\n",
|
|
" <td>0.978604</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>214</th>\n",
|
|
" <td>TMB - anciens participants Standard et HALF</td>\n",
|
|
" <td>0.004924</td>\n",
|
|
" <td>0.979241</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>242</th>\n",
|
|
" <td>ltgt - pro - fr vf</td>\n",
|
|
" <td>0.004798</td>\n",
|
|
" <td>0.979862</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>36</th>\n",
|
|
" <td>20km23_nl 3 26.10 eng</td>\n",
|
|
" <td>0.004720</td>\n",
|
|
" <td>0.980474</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>28</th>\n",
|
|
" <td>20km22 - inscrits - ANG</td>\n",
|
|
" <td>0.004599</td>\n",
|
|
" <td>0.981069</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>74</th>\n",
|
|
" <td>GGM24_INSCRIS FR&ENG</td>\n",
|
|
" <td>0.004245</td>\n",
|
|
" <td>0.981619</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>239</th>\n",
|
|
" <td>ggm24_inscrits 05.10</td>\n",
|
|
" <td>0.004245</td>\n",
|
|
" <td>0.982168</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>62</th>\n",
|
|
" <td>Entreprises - FR</td>\n",
|
|
" <td>0.004091</td>\n",
|
|
" <td>0.982698</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>290</th>\n",
|
|
" <td>rm23_inscrits fr nl2 21.07</td>\n",
|
|
" <td>0.004003</td>\n",
|
|
" <td>0.983216</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>208</th>\n",
|
|
" <td>TMB - Contact PT OK</td>\n",
|
|
" <td>0.003771</td>\n",
|
|
" <td>0.983704</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>31</th>\n",
|
|
" <td>20km23_inscrits eng 02.10</td>\n",
|
|
" <td>0.003699</td>\n",
|
|
" <td>0.984183</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>37</th>\n",
|
|
" <td>20km23_nl finisher eng</td>\n",
|
|
" <td>0.003588</td>\n",
|
|
" <td>0.984647</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>20KM23_FINISHER_ENG</td>\n",
|
|
" <td>0.003588</td>\n",
|
|
" <td>0.985112</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>111</th>\n",
|
|
" <td>LTGT22 - Anciens participants Half, Half relais, standard, standard relais - EN</td>\n",
|
|
" <td>0.003555</td>\n",
|
|
" <td>0.985572</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>207</th>\n",
|
|
" <td>TIMB23 - liste d'attente - FR</td>\n",
|
|
" <td>0.003307</td>\n",
|
|
" <td>0.986000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>205</th>\n",
|
|
" <td>TIMB22 - reports FR+EN - VF</td>\n",
|
|
" <td>0.003058</td>\n",
|
|
" <td>0.986396</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>165</th>\n",
|
|
" <td>RM20 - Inscrits sur liste d'attente - FR</td>\n",
|
|
" <td>0.003058</td>\n",
|
|
" <td>0.986792</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>106</th>\n",
|
|
" <td>LTGT21 - Inscrits ENG</td>\n",
|
|
" <td>0.003031</td>\n",
|
|
" <td>0.987185</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>216</th>\n",
|
|
" <td>TMB21 - Liste d'attente - FR</td>\n",
|
|
" <td>0.002992</td>\n",
|
|
" <td>0.987572</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>202</th>\n",
|
|
" <td>TIMB21 - liste d'attente - FR</td>\n",
|
|
" <td>0.002992</td>\n",
|
|
" <td>0.987959</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>245</th>\n",
|
|
" <td>ltgt23 - non half eng</td>\n",
|
|
" <td>0.002871</td>\n",
|
|
" <td>0.988331</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" target_name \\\n",
|
|
"184 Run Mate \n",
|
|
"233 Triathlon \n",
|
|
"77 HGM \n",
|
|
"24 20km Genève \n",
|
|
"75 GM23 \n",
|
|
"68 GGM - inscrits 2015 - 2023 - FR \n",
|
|
"85 HGM - Inscrits 2005-2019 - FR \n",
|
|
"187 RunMate - 500km autour montreux - FR \n",
|
|
"188 RunMate - 500km autour montreux - FR v2 \n",
|
|
"46 BDD - Semi & Marathon_FR \n",
|
|
"83 HGM - GM Public - FR \n",
|
|
"84 HGM - Inscrits 2005-2019 - ENG \n",
|
|
"67 GGM - inscrits 2015 - 2023 - ENG \n",
|
|
"73 GGM23 - inscrits - FR \n",
|
|
"45 BDD - Semi & Marathon_ENG \n",
|
|
"82 HGM - GM Public - ENG \n",
|
|
"95 LTGT - Inscrits 2008-2019 - FR \n",
|
|
"186 RunMate - 500km autour montreux - ENG \n",
|
|
"210 TMB - Opt-in \n",
|
|
"118 LTGT23-Amateur-FR \n",
|
|
"88 HGM désinscriptions \n",
|
|
"213 TMB - anciens participants 2015 - 2020 - FR+ENG \n",
|
|
"19 20km - Inscrits 2017-2019 - FR \n",
|
|
"100 LTGT - inscrits 2015 - 2020 - FR \n",
|
|
"10 20KM23_Inscrit NL 25.10_FR \n",
|
|
"185 Run Mate désinscriptions \n",
|
|
"303 test listes \n",
|
|
"72 GGM23 - inscrits - ENG \n",
|
|
"9 20KM23_FINISHER_FR2 \n",
|
|
"8 20KM23_FINISHER_FR \n",
|
|
"38 20km23_nl finisher fr \n",
|
|
"107 LTGT21 - Inscrits FR+ENG \n",
|
|
"33 20km23_inscrits fr 05.10_test \n",
|
|
"25 20km Genève désinscriptions \n",
|
|
"113 LTGT22 - inscris FR+ENG \n",
|
|
"47 Bénévoles Mass Events \n",
|
|
"32 20km23_inscrits fr 02.10 \n",
|
|
"64 GGM - Contact PT OK \n",
|
|
"112 LTGT22 - Anciens participants Half, Half relais, standard, standard relais - FR \n",
|
|
"225 TMB23-Amateur-FR \n",
|
|
"110 LTGT21 - inscrits FR \n",
|
|
"219 TMB22 - inscrits FR \n",
|
|
"94 LTGT - Inscrits 2008-2019 - ENG \n",
|
|
"115 LTGT22 - inscrits FR \n",
|
|
"203 TIMB22 - incritS FR+ENG \n",
|
|
"29 20km22 - inscrits - FR \n",
|
|
"27 20km20 - Inscrits finaux - FR \n",
|
|
"4 20KM21 - inscrits FR \n",
|
|
"2 20KM - Contact PT OKb \n",
|
|
"175 RM21- Inscrits FR+ENG \n",
|
|
"222 TMB23 - inscrits FR + ENG \n",
|
|
"223 TMB23 - inscrits FR + ENG - \n",
|
|
"304 tmb - pro - fr - VF \n",
|
|
"221 TMB23 - inscrits FR \n",
|
|
"105 LTGT20 - Inscrits - FR \n",
|
|
"234 Triathlon désinscriptions \n",
|
|
"238 ggm23 - inscrits fr + eng \n",
|
|
"117 LTGT23-Amateur-ENG- vf \n",
|
|
"116 LTGT23-Amateur-ENG \n",
|
|
"173 RM21 - inscrits FR (reportés + new) \n",
|
|
"197 TE23_Inscrits_FR \n",
|
|
"153 RM - Contact PT OK \n",
|
|
"177 RM22 - inscrits FR \n",
|
|
"35 20km23_inscrits nl 1 fr - 17.08 \n",
|
|
"12 20KM23_Inscrits NL 1 FR - 17.08 \n",
|
|
"97 LTGT - Inscrits NL - FR \n",
|
|
"166 RM20 - inscrits 2020 (Total avec reports) - FR \n",
|
|
"297 rm23_nl finisher fr \n",
|
|
"180 RM23_NL Finisher FR \n",
|
|
"162 RM19 - Inscrits - FR \n",
|
|
"292 rm23_inscrits nl 19.09 fr \n",
|
|
"66 GGM - Inscrits NL - FR \n",
|
|
"93 LTGT - Contact PT OK \n",
|
|
"174 RM21 - new inscrits - FR \n",
|
|
"18 20km - Inscrits 2017-2019 - ENG \n",
|
|
"42 20km23_no show fr \n",
|
|
"16 20KM23_NO SHOW_FR \n",
|
|
"101 LTGT - inscrits 2015-2020 - ENG \n",
|
|
"21 20km - Inscrits NL - FR \n",
|
|
"109 LTGT21 - Reportés - FR \n",
|
|
"214 TMB - anciens participants Standard et HALF \n",
|
|
"242 ltgt - pro - fr vf \n",
|
|
"36 20km23_nl 3 26.10 eng \n",
|
|
"28 20km22 - inscrits - ANG \n",
|
|
"74 GGM24_INSCRIS FR&ENG \n",
|
|
"239 ggm24_inscrits 05.10 \n",
|
|
"62 Entreprises - FR \n",
|
|
"290 rm23_inscrits fr nl2 21.07 \n",
|
|
"208 TMB - Contact PT OK \n",
|
|
"31 20km23_inscrits eng 02.10 \n",
|
|
"37 20km23_nl finisher eng \n",
|
|
"7 20KM23_FINISHER_ENG \n",
|
|
"111 LTGT22 - Anciens participants Half, Half relais, standard, standard relais - EN \n",
|
|
"207 TIMB23 - liste d'attente - FR \n",
|
|
"205 TIMB22 - reports FR+EN - VF \n",
|
|
"165 RM20 - Inscrits sur liste d'attente - FR \n",
|
|
"106 LTGT21 - Inscrits ENG \n",
|
|
"216 TMB21 - Liste d'attente - FR \n",
|
|
"202 TIMB21 - liste d'attente - FR \n",
|
|
"245 ltgt23 - non half eng \n",
|
|
"\n",
|
|
" customer_id cumulative_customers \n",
|
|
"184 0.999939 0.129451 \n",
|
|
"233 0.999934 0.258901 \n",
|
|
"77 0.999934 0.388351 \n",
|
|
"24 0.999917 0.517799 \n",
|
|
"75 0.996561 0.646813 \n",
|
|
"68 0.261439 0.680658 \n",
|
|
"85 0.243110 0.712131 \n",
|
|
"187 0.236612 0.742763 \n",
|
|
"188 0.204162 0.769193 \n",
|
|
"46 0.169536 0.791141 \n",
|
|
"83 0.154420 0.811132 \n",
|
|
"84 0.113401 0.825813 \n",
|
|
"67 0.107405 0.839717 \n",
|
|
"73 0.066734 0.848357 \n",
|
|
"45 0.063240 0.856544 \n",
|
|
"82 0.061893 0.864556 \n",
|
|
"95 0.050371 0.871077 \n",
|
|
"186 0.042316 0.876555 \n",
|
|
"210 0.039738 0.881700 \n",
|
|
"118 0.036067 0.886369 \n",
|
|
"88 0.031893 0.890498 \n",
|
|
"213 0.028145 0.894141 \n",
|
|
"19 0.027267 0.897671 \n",
|
|
"100 0.026185 0.901061 \n",
|
|
"10 0.023049 0.904045 \n",
|
|
"185 0.021746 0.906860 \n",
|
|
"303 0.021321 0.909621 \n",
|
|
"72 0.021304 0.912379 \n",
|
|
"9 0.020316 0.915009 \n",
|
|
"8 0.020316 0.917639 \n",
|
|
"38 0.020316 0.920269 \n",
|
|
"107 0.019058 0.922736 \n",
|
|
"33 0.017445 0.924995 \n",
|
|
"25 0.017390 0.927246 \n",
|
|
"113 0.016601 0.929395 \n",
|
|
"47 0.016501 0.931531 \n",
|
|
"32 0.016479 0.933665 \n",
|
|
"64 0.016065 0.935744 \n",
|
|
"112 0.015425 0.937741 \n",
|
|
"225 0.013349 0.939470 \n",
|
|
"110 0.012642 0.941106 \n",
|
|
"219 0.010732 0.942496 \n",
|
|
"94 0.010688 0.943879 \n",
|
|
"115 0.010368 0.945221 \n",
|
|
"203 0.010064 0.946524 \n",
|
|
"29 0.010009 0.947820 \n",
|
|
"27 0.009518 0.949052 \n",
|
|
"4 0.009507 0.950283 \n",
|
|
"2 0.009451 0.951507 \n",
|
|
"175 0.009098 0.952684 \n",
|
|
"222 0.008518 0.953787 \n",
|
|
"223 0.008518 0.954890 \n",
|
|
"304 0.008298 0.955964 \n",
|
|
"221 0.007834 0.956978 \n",
|
|
"105 0.007746 0.957981 \n",
|
|
"234 0.007740 0.958983 \n",
|
|
"238 0.007276 0.959925 \n",
|
|
"117 0.007166 0.960853 \n",
|
|
"116 0.007166 0.961780 \n",
|
|
"173 0.007160 0.962707 \n",
|
|
"197 0.007116 0.963629 \n",
|
|
"153 0.006967 0.964531 \n",
|
|
"177 0.006879 0.965421 \n",
|
|
"35 0.006879 0.966312 \n",
|
|
"12 0.006879 0.967202 \n",
|
|
"97 0.006669 0.968066 \n",
|
|
"166 0.006592 0.968919 \n",
|
|
"297 0.006117 0.969711 \n",
|
|
"180 0.006117 0.970503 \n",
|
|
"162 0.006106 0.971293 \n",
|
|
"292 0.006089 0.972081 \n",
|
|
"66 0.006023 0.972861 \n",
|
|
"93 0.005973 0.973635 \n",
|
|
"174 0.005808 0.974386 \n",
|
|
"18 0.005543 0.975104 \n",
|
|
"42 0.005443 0.975809 \n",
|
|
"16 0.005443 0.976513 \n",
|
|
"101 0.005405 0.977213 \n",
|
|
"21 0.005377 0.977909 \n",
|
|
"109 0.005366 0.978604 \n",
|
|
"214 0.004924 0.979241 \n",
|
|
"242 0.004798 0.979862 \n",
|
|
"36 0.004720 0.980474 \n",
|
|
"28 0.004599 0.981069 \n",
|
|
"74 0.004245 0.981619 \n",
|
|
"239 0.004245 0.982168 \n",
|
|
"62 0.004091 0.982698 \n",
|
|
"290 0.004003 0.983216 \n",
|
|
"208 0.003771 0.983704 \n",
|
|
"31 0.003699 0.984183 \n",
|
|
"37 0.003588 0.984647 \n",
|
|
"7 0.003588 0.985112 \n",
|
|
"111 0.003555 0.985572 \n",
|
|
"207 0.003307 0.986000 \n",
|
|
"205 0.003058 0.986396 \n",
|
|
"165 0.003058 0.986792 \n",
|
|
"106 0.003031 0.987185 \n",
|
|
"216 0.002992 0.987572 \n",
|
|
"202 0.002992 0.987959 \n",
|
|
"245 0.002871 0.988331 "
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"print_main_target('9', 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "cf8f7816-e7f3-4b7a-a987-8350a76eb140",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n",
|
|
"Nombre de ciblage : 69258\n",
|
|
"Nombre de client avec étiquette target : 53639\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>target_name</th>\n",
|
|
" <th>customer_id</th>\n",
|
|
" <th>cumulative_customers</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>Newsletter mensuelle</td>\n",
|
|
" <td>0.722068</td>\n",
|
|
" <td>0.559228</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Blackliste</td>\n",
|
|
" <td>0.188669</td>\n",
|
|
" <td>0.705348</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>36</th>\n",
|
|
" <td>import opt-in février 2023</td>\n",
|
|
" <td>0.068141</td>\n",
|
|
" <td>0.758122</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>37</th>\n",
|
|
" <td>import opt-out fév 23</td>\n",
|
|
" <td>0.055016</td>\n",
|
|
" <td>0.800731</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Contacts opt-out 20.10.22</td>\n",
|
|
" <td>0.047726</td>\n",
|
|
" <td>0.837694</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>Théâtre amateur</td>\n",
|
|
" <td>0.033054</td>\n",
|
|
" <td>0.863294</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>41</th>\n",
|
|
" <td>liste théâtre amateur</td>\n",
|
|
" <td>0.026641</td>\n",
|
|
" <td>0.883927</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>CP EUH 7 octobre 2022</td>\n",
|
|
" <td>0.026492</td>\n",
|
|
" <td>0.904444</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>47</th>\n",
|
|
" <td>spec Falaise - relance Mazut</td>\n",
|
|
" <td>0.020452</td>\n",
|
|
" <td>0.920284</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>35</th>\n",
|
|
" <td>fichierspectateurs_recreatrales</td>\n",
|
|
" <td>0.016052</td>\n",
|
|
" <td>0.932715</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>fichier news quartier grand t</td>\n",
|
|
" <td>0.007644</td>\n",
|
|
" <td>0.938635</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>33</th>\n",
|
|
" <td>fichier2 news quartier grand t</td>\n",
|
|
" <td>0.007383</td>\n",
|
|
" <td>0.944353</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>contacts amateurs 22-23 ok ok</td>\n",
|
|
" <td>0.006842</td>\n",
|
|
" <td>0.949652</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>amateurs incandescences oct 22</td>\n",
|
|
" <td>0.006730</td>\n",
|
|
" <td>0.954864</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>fichier invit Soir de fête</td>\n",
|
|
" <td>0.005984</td>\n",
|
|
" <td>0.959499</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>Spectateurs Ce qu'il faut dire</td>\n",
|
|
" <td>0.004959</td>\n",
|
|
" <td>0.963340</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>49</th>\n",
|
|
" <td>spec Mazut - tout</td>\n",
|
|
" <td>0.003300</td>\n",
|
|
" <td>0.965896</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>26</th>\n",
|
|
" <td>fichier invitation déjeuner chantier le 8 juin</td>\n",
|
|
" <td>0.003151</td>\n",
|
|
" <td>0.968336</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>3 M - spec du dimanche (toutes rep)</td>\n",
|
|
" <td>0.003132</td>\n",
|
|
" <td>0.970762</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>fichier op les dodos 21 nov global</td>\n",
|
|
" <td>0.003020</td>\n",
|
|
" <td>0.973101</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>Invitation protocolaire Les Fauves</td>\n",
|
|
" <td>0.002573</td>\n",
|
|
" <td>0.975093</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>3 M - spec du samedi (toutes rep)</td>\n",
|
|
" <td>0.002554</td>\n",
|
|
" <td>0.977071</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>28</th>\n",
|
|
" <td>fichier jumelage halveque</td>\n",
|
|
" <td>0.002498</td>\n",
|
|
" <td>0.979006</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>fichier invitations pro les enfants d'amazi</td>\n",
|
|
" <td>0.002256</td>\n",
|
|
" <td>0.980753</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>Spectateurs Acte(s) et sueurs</td>\n",
|
|
" <td>0.002237</td>\n",
|
|
" <td>0.982486</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>Presse 2021</td>\n",
|
|
" <td>0.002200</td>\n",
|
|
" <td>0.984190</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>Spectateurs-rices SpaceSongs</td>\n",
|
|
" <td>0.002069</td>\n",
|
|
" <td>0.985792</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>Presse Tous Terriens</td>\n",
|
|
" <td>0.002069</td>\n",
|
|
" <td>0.987395</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>46</th>\n",
|
|
" <td>nouveaux acheteurs 23-24 - relance Mazut</td>\n",
|
|
" <td>0.002032</td>\n",
|
|
" <td>0.988969</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>Presse communqiué agglo</td>\n",
|
|
" <td>0.001920</td>\n",
|
|
" <td>0.990456</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>Presse 22-23</td>\n",
|
|
" <td>0.001883</td>\n",
|
|
" <td>0.991914</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>31</th>\n",
|
|
" <td>fichier presse grand t 23-24export</td>\n",
|
|
" <td>0.001864</td>\n",
|
|
" <td>0.993358</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Bottière - jumelage (contacts proches projet</td>\n",
|
|
" <td>0.001566</td>\n",
|
|
" <td>0.994571</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>P6S</td>\n",
|
|
" <td>0.001286</td>\n",
|
|
" <td>0.995567</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>43</th>\n",
|
|
" <td>mails permanents gd t fev23</td>\n",
|
|
" <td>0.000932</td>\n",
|
|
" <td>0.996289</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>34</th>\n",
|
|
" <td>fichiercontactstous_enssup_relance étudiants</td>\n",
|
|
" <td>0.000820</td>\n",
|
|
" <td>0.996925</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>42</th>\n",
|
|
" <td>mails intervenants mdla fev23</td>\n",
|
|
" <td>0.000522</td>\n",
|
|
" <td>0.997329</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>48</th>\n",
|
|
" <td>spec Jessica</td>\n",
|
|
" <td>0.000522</td>\n",
|
|
" <td>0.997733</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Invit conf presse EUH : artistes + DRAC</td>\n",
|
|
" <td>0.000466</td>\n",
|
|
" <td>0.998094</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>P2S</td>\n",
|
|
" <td>0.000392</td>\n",
|
|
" <td>0.998397</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>39</th>\n",
|
|
" <td>liste intervenants mdla_newsletter temuda</td>\n",
|
|
" <td>0.000392</td>\n",
|
|
" <td>0.998701</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>Personnel du Grand T</td>\n",
|
|
" <td>0.000354</td>\n",
|
|
" <td>0.998975</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>44</th>\n",
|
|
" <td>mails permanents mdla fev23</td>\n",
|
|
" <td>0.000336</td>\n",
|
|
" <td>0.999235</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32</th>\n",
|
|
" <td>fichier-sante-social-terminato</td>\n",
|
|
" <td>0.000317</td>\n",
|
|
" <td>0.999480</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>40</th>\n",
|
|
" <td>liste permanents mdla_newsletter temuda</td>\n",
|
|
" <td>0.000317</td>\n",
|
|
" <td>0.999726</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>Presse éducation</td>\n",
|
|
" <td>0.000131</td>\n",
|
|
" <td>0.999827</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>38</th>\n",
|
|
" <td>liste billetterie</td>\n",
|
|
" <td>0.000056</td>\n",
|
|
" <td>0.999870</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>Equipe com Grand T</td>\n",
|
|
" <td>0.000056</td>\n",
|
|
" <td>0.999913</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>45</th>\n",
|
|
" <td>news jumelage Bottière ajouts</td>\n",
|
|
" <td>0.000056</td>\n",
|
|
" <td>0.999957</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>emails yohann et tiphaine</td>\n",
|
|
" <td>0.000056</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" target_name customer_id \\\n",
|
|
"9 Newsletter mensuelle 0.722068 \n",
|
|
"2 Blackliste 0.188669 \n",
|
|
"36 import opt-in février 2023 0.068141 \n",
|
|
"37 import opt-out fév 23 0.055016 \n",
|
|
"5 Contacts opt-out 20.10.22 0.047726 \n",
|
|
"21 Théâtre amateur 0.033054 \n",
|
|
"41 liste théâtre amateur 0.026641 \n",
|
|
"4 CP EUH 7 octobre 2022 0.026492 \n",
|
|
"47 spec Falaise - relance Mazut 0.020452 \n",
|
|
"35 fichierspectateurs_recreatrales 0.016052 \n",
|
|
"29 fichier news quartier grand t 0.007644 \n",
|
|
"33 fichier2 news quartier grand t 0.007383 \n",
|
|
"23 contacts amateurs 22-23 ok ok 0.006842 \n",
|
|
"22 amateurs incandescences oct 22 0.006730 \n",
|
|
"25 fichier invit Soir de fête 0.005984 \n",
|
|
"19 Spectateurs Ce qu'il faut dire 0.004959 \n",
|
|
"49 spec Mazut - tout 0.003300 \n",
|
|
"26 fichier invitation déjeuner chantier le 8 juin 0.003151 \n",
|
|
"0 3 M - spec du dimanche (toutes rep) 0.003132 \n",
|
|
"30 fichier op les dodos 21 nov global 0.003020 \n",
|
|
"8 Invitation protocolaire Les Fauves 0.002573 \n",
|
|
"1 3 M - spec du samedi (toutes rep) 0.002554 \n",
|
|
"28 fichier jumelage halveque 0.002498 \n",
|
|
"27 fichier invitations pro les enfants d'amazi 0.002256 \n",
|
|
"18 Spectateurs Acte(s) et sueurs 0.002237 \n",
|
|
"13 Presse 2021 0.002200 \n",
|
|
"20 Spectateurs-rices SpaceSongs 0.002069 \n",
|
|
"15 Presse Tous Terriens 0.002069 \n",
|
|
"46 nouveaux acheteurs 23-24 - relance Mazut 0.002032 \n",
|
|
"16 Presse communqiué agglo 0.001920 \n",
|
|
"14 Presse 22-23 0.001883 \n",
|
|
"31 fichier presse grand t 23-24export 0.001864 \n",
|
|
"3 Bottière - jumelage (contacts proches projet 0.001566 \n",
|
|
"11 P6S 0.001286 \n",
|
|
"43 mails permanents gd t fev23 0.000932 \n",
|
|
"34 fichiercontactstous_enssup_relance étudiants 0.000820 \n",
|
|
"42 mails intervenants mdla fev23 0.000522 \n",
|
|
"48 spec Jessica 0.000522 \n",
|
|
"7 Invit conf presse EUH : artistes + DRAC 0.000466 \n",
|
|
"10 P2S 0.000392 \n",
|
|
"39 liste intervenants mdla_newsletter temuda 0.000392 \n",
|
|
"12 Personnel du Grand T 0.000354 \n",
|
|
"44 mails permanents mdla fev23 0.000336 \n",
|
|
"32 fichier-sante-social-terminato 0.000317 \n",
|
|
"40 liste permanents mdla_newsletter temuda 0.000317 \n",
|
|
"17 Presse éducation 0.000131 \n",
|
|
"38 liste billetterie 0.000056 \n",
|
|
"6 Equipe com Grand T 0.000056 \n",
|
|
"45 news jumelage Bottière ajouts 0.000056 \n",
|
|
"24 emails yohann et tiphaine 0.000056 \n",
|
|
"\n",
|
|
" cumulative_customers \n",
|
|
"9 0.559228 \n",
|
|
"2 0.705348 \n",
|
|
"36 0.758122 \n",
|
|
"37 0.800731 \n",
|
|
"5 0.837694 \n",
|
|
"21 0.863294 \n",
|
|
"41 0.883927 \n",
|
|
"4 0.904444 \n",
|
|
"47 0.920284 \n",
|
|
"35 0.932715 \n",
|
|
"29 0.938635 \n",
|
|
"33 0.944353 \n",
|
|
"23 0.949652 \n",
|
|
"22 0.954864 \n",
|
|
"25 0.959499 \n",
|
|
"19 0.963340 \n",
|
|
"49 0.965896 \n",
|
|
"26 0.968336 \n",
|
|
"0 0.970762 \n",
|
|
"30 0.973101 \n",
|
|
"8 0.975093 \n",
|
|
"1 0.977071 \n",
|
|
"28 0.979006 \n",
|
|
"27 0.980753 \n",
|
|
"18 0.982486 \n",
|
|
"13 0.984190 \n",
|
|
"20 0.985792 \n",
|
|
"15 0.987395 \n",
|
|
"46 0.988969 \n",
|
|
"16 0.990456 \n",
|
|
"14 0.991914 \n",
|
|
"31 0.993358 \n",
|
|
"3 0.994571 \n",
|
|
"11 0.995567 \n",
|
|
"43 0.996289 \n",
|
|
"34 0.996925 \n",
|
|
"42 0.997329 \n",
|
|
"48 0.997733 \n",
|
|
"7 0.998094 \n",
|
|
"10 0.998397 \n",
|
|
"39 0.998701 \n",
|
|
"12 0.998975 \n",
|
|
"44 0.999235 \n",
|
|
"32 0.999480 \n",
|
|
"40 0.999726 \n",
|
|
"17 0.999827 \n",
|
|
"38 0.999870 \n",
|
|
"6 0.999913 \n",
|
|
"45 0.999957 \n",
|
|
"24 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"print_main_target('10', 100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "76c818a5-3c52-4d97-ac81-b7f3f89092bd",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n",
|
|
"Nombre de ciblage : 124302\n",
|
|
"Nombre de client avec étiquette target : 62915\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>target_name</th>\n",
|
|
" <th>customer_id</th>\n",
|
|
" <th>cumulative_customers</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>50</th>\n",
|
|
" <td>Temp - DOUBLE OPTIN</td>\n",
|
|
" <td>0.410983</td>\n",
|
|
" <td>0.208018</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>31</th>\n",
|
|
" <td>Nombre de représentations = 1</td>\n",
|
|
" <td>0.330128</td>\n",
|
|
" <td>0.375111</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>65</th>\n",
|
|
" <td>primo-spectateurs (fidélité = 1)</td>\n",
|
|
" <td>0.247811</td>\n",
|
|
" <td>0.500539</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Acheteurs réguliers (fidélité >= 2)</td>\n",
|
|
" <td>0.126202</td>\n",
|
|
" <td>0.564416</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>34</th>\n",
|
|
" <td>Nombre de représentations = ou > 4</td>\n",
|
|
" <td>0.085290</td>\n",
|
|
" <td>0.607585</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Brochure janvier-juin 2023</td>\n",
|
|
" <td>0.076929</td>\n",
|
|
" <td>0.646522</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>NEWSLETTER</td>\n",
|
|
" <td>0.074370</td>\n",
|
|
" <td>0.684164</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Demande brochure sept-déc 23</td>\n",
|
|
" <td>0.071541</td>\n",
|
|
" <td>0.720375</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>Demande brochure sept-déc 23 DEF</td>\n",
|
|
" <td>0.071461</td>\n",
|
|
" <td>0.756545</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32</th>\n",
|
|
" <td>Nombre de représentations = 2</td>\n",
|
|
" <td>0.071016</td>\n",
|
|
" <td>0.792489</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>52</th>\n",
|
|
" <td>Waterproof_2023</td>\n",
|
|
" <td>0.065326</td>\n",
|
|
" <td>0.825554</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>46</th>\n",
|
|
" <td>Relance Poppée 08/09/23</td>\n",
|
|
" <td>0.057077</td>\n",
|
|
" <td>0.854443</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>28</th>\n",
|
|
" <td>Luisa Miller_ciblé</td>\n",
|
|
" <td>0.052404</td>\n",
|
|
" <td>0.880967</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>En dernier lieu</td>\n",
|
|
" <td>0.036287</td>\n",
|
|
" <td>0.899334</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Bilan Carmen danse</td>\n",
|
|
" <td>0.030660</td>\n",
|
|
" <td>0.914853</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>33</th>\n",
|
|
" <td>Nombre de représentations = 3</td>\n",
|
|
" <td>0.029023</td>\n",
|
|
" <td>0.929543</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>Inscription Newsletter</td>\n",
|
|
" <td>0.019026</td>\n",
|
|
" <td>0.939172</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Code postal 56</td>\n",
|
|
" <td>0.015481</td>\n",
|
|
" <td>0.947008</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>Les nuits d'été - mail ciblé 13/10/23</td>\n",
|
|
" <td>0.011825</td>\n",
|
|
" <td>0.952994</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>42</th>\n",
|
|
" <td>Poppée 5, 7 et 8 octobre 23</td>\n",
|
|
" <td>0.008901</td>\n",
|
|
" <td>0.957499</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>43</th>\n",
|
|
" <td>Promo musique du monde Mawâl de la terre</td>\n",
|
|
" <td>0.008122</td>\n",
|
|
" <td>0.961610</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>38</th>\n",
|
|
" <td>PRESSE NATIONALE</td>\n",
|
|
" <td>0.006978</td>\n",
|
|
" <td>0.965141</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>L'Élixir d'amour 11, 13 mai</td>\n",
|
|
" <td>0.006390</td>\n",
|
|
" <td>0.968375</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>Florilège mail ciblé</td>\n",
|
|
" <td>0.006390</td>\n",
|
|
" <td>0.971609</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>Les Nuits d'été - avant spectacle</td>\n",
|
|
" <td>0.006056</td>\n",
|
|
" <td>0.974675</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>Enquête_Bal de Paris</td>\n",
|
|
" <td>0.005786</td>\n",
|
|
" <td>0.977603</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>Luisa Miller 23 mars</td>\n",
|
|
" <td>0.004339</td>\n",
|
|
" <td>0.979799</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>Les nuits d'été ajout - mail ciblé 13/10/23</td>\n",
|
|
" <td>0.003465</td>\n",
|
|
" <td>0.981553</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>GRANDS EVENEMENTS</td>\n",
|
|
" <td>0.003386</td>\n",
|
|
" <td>0.983267</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>41</th>\n",
|
|
" <td>Poppée 3 octobre 23</td>\n",
|
|
" <td>0.002416</td>\n",
|
|
" <td>0.984489</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>62</th>\n",
|
|
" <td>liste mécénat et prospect</td>\n",
|
|
" <td>0.002273</td>\n",
|
|
" <td>0.985640</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>40</th>\n",
|
|
" <td>Poppée 1 octobre 23 uniquement</td>\n",
|
|
" <td>0.002162</td>\n",
|
|
" <td>0.986734</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>L'Élixir d'amour 5, 7, 9 mai</td>\n",
|
|
" <td>0.002098</td>\n",
|
|
" <td>0.987796</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>37</th>\n",
|
|
" <td>PRESSE LOCALE</td>\n",
|
|
" <td>0.001891</td>\n",
|
|
" <td>0.988753</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>26</th>\n",
|
|
" <td>Luisa Miller 23 et 25 mars</td>\n",
|
|
" <td>0.001764</td>\n",
|
|
" <td>0.989646</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>59</th>\n",
|
|
" <td>liste invites grand boum</td>\n",
|
|
" <td>0.001637</td>\n",
|
|
" <td>0.990475</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>INVITS PREMIERES</td>\n",
|
|
" <td>0.001494</td>\n",
|
|
" <td>0.991231</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>Luisa Miller 19 et 21 mars</td>\n",
|
|
" <td>0.001446</td>\n",
|
|
" <td>0.991963</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>Invités TNB</td>\n",
|
|
" <td>0.001208</td>\n",
|
|
" <td>0.992575</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>44</th>\n",
|
|
" <td>Protocole : REGIONAUX</td>\n",
|
|
" <td>0.001160</td>\n",
|
|
" <td>0.993162</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>54</th>\n",
|
|
" <td>assos danse</td>\n",
|
|
" <td>0.001160</td>\n",
|
|
" <td>0.993749</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>45</th>\n",
|
|
" <td>Protocole Objectif Choeurs</td>\n",
|
|
" <td>0.001160</td>\n",
|
|
" <td>0.994336</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>51</th>\n",
|
|
" <td>Titulaires cartes Opéra</td>\n",
|
|
" <td>0.000985</td>\n",
|
|
" <td>0.994835</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Ecoles élémentaires Rennes</td>\n",
|
|
" <td>0.000954</td>\n",
|
|
" <td>0.995318</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>53</th>\n",
|
|
" <td>Zaïde 10 et 12 février</td>\n",
|
|
" <td>0.000827</td>\n",
|
|
" <td>0.995736</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>60</th>\n",
|
|
" <td>liste invités soirée 11 septembre mba</td>\n",
|
|
" <td>0.000795</td>\n",
|
|
" <td>0.996138</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>Info Tutelle</td>\n",
|
|
" <td>0.000795</td>\n",
|
|
" <td>0.996541</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>61</th>\n",
|
|
" <td>liste invités soirée 11 septembre mba 2</td>\n",
|
|
" <td>0.000763</td>\n",
|
|
" <td>0.996927</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>Hira Gasy Spectateurs</td>\n",
|
|
" <td>0.000731</td>\n",
|
|
" <td>0.997297</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>48</th>\n",
|
|
" <td>Spectateurs Passion selon Brockes</td>\n",
|
|
" <td>0.000636</td>\n",
|
|
" <td>0.997619</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>47</th>\n",
|
|
" <td>Spectateurs Oratorios pour Passion</td>\n",
|
|
" <td>0.000525</td>\n",
|
|
" <td>0.997884</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>36</th>\n",
|
|
" <td>PERSONNEL OPERA</td>\n",
|
|
" <td>0.000493</td>\n",
|
|
" <td>0.998134</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>57</th>\n",
|
|
" <td>invités représentation du 1er octobre2023</td>\n",
|
|
" <td>0.000445</td>\n",
|
|
" <td>0.998359</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>64</th>\n",
|
|
" <td>mécène - vernissage frac</td>\n",
|
|
" <td>0.000429</td>\n",
|
|
" <td>0.998576</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>INVITS CONF PRESSE ETE 2020</td>\n",
|
|
" <td>0.000381</td>\n",
|
|
" <td>0.998769</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>Elixir d'amour 15/04 14h30</td>\n",
|
|
" <td>0.000350</td>\n",
|
|
" <td>0.998946</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>Elixir d'amour 15/04 17h30</td>\n",
|
|
" <td>0.000350</td>\n",
|
|
" <td>0.999123</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>39</th>\n",
|
|
" <td>PRESSE NUMERIQUE</td>\n",
|
|
" <td>0.000334</td>\n",
|
|
" <td>0.999292</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>55</th>\n",
|
|
" <td>invitations représentation 3 octobre2023</td>\n",
|
|
" <td>0.000334</td>\n",
|
|
" <td>0.999461</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>58</th>\n",
|
|
" <td>liste chargé.e.s de communication</td>\n",
|
|
" <td>0.000334</td>\n",
|
|
" <td>0.999630</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>MECENES</td>\n",
|
|
" <td>0.000207</td>\n",
|
|
" <td>0.999735</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>56</th>\n",
|
|
" <td>invités représentation 1er octobre2023</td>\n",
|
|
" <td>0.000207</td>\n",
|
|
" <td>0.999839</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>35</th>\n",
|
|
" <td>OPERA</td>\n",
|
|
" <td>0.000159</td>\n",
|
|
" <td>0.999920</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>63</th>\n",
|
|
" <td>liste mécénat et prospect 2 - erreur mail</td>\n",
|
|
" <td>0.000064</td>\n",
|
|
" <td>0.999952</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>49</th>\n",
|
|
" <td>TEST ENVOI</td>\n",
|
|
" <td>0.000064</td>\n",
|
|
" <td>0.999984</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Choeur ouvert 22-23</td>\n",
|
|
" <td>0.000032</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" target_name customer_id \\\n",
|
|
"50 Temp - DOUBLE OPTIN 0.410983 \n",
|
|
"31 Nombre de représentations = 1 0.330128 \n",
|
|
"65 primo-spectateurs (fidélité = 1) 0.247811 \n",
|
|
"0 Acheteurs réguliers (fidélité >= 2) 0.126202 \n",
|
|
"34 Nombre de représentations = ou > 4 0.085290 \n",
|
|
"2 Brochure janvier-juin 2023 0.076929 \n",
|
|
"30 NEWSLETTER 0.074370 \n",
|
|
"5 Demande brochure sept-déc 23 0.071541 \n",
|
|
"6 Demande brochure sept-déc 23 DEF 0.071461 \n",
|
|
"32 Nombre de représentations = 2 0.071016 \n",
|
|
"52 Waterproof_2023 0.065326 \n",
|
|
"46 Relance Poppée 08/09/23 0.057077 \n",
|
|
"28 Luisa Miller_ciblé 0.052404 \n",
|
|
"10 En dernier lieu 0.036287 \n",
|
|
"1 Bilan Carmen danse 0.030660 \n",
|
|
"33 Nombre de représentations = 3 0.029023 \n",
|
|
"18 Inscription Newsletter 0.019026 \n",
|
|
"4 Code postal 56 0.015481 \n",
|
|
"23 Les nuits d'été - mail ciblé 13/10/23 0.011825 \n",
|
|
"42 Poppée 5, 7 et 8 octobre 23 0.008901 \n",
|
|
"43 Promo musique du monde Mawâl de la terre 0.008122 \n",
|
|
"38 PRESSE NATIONALE 0.006978 \n",
|
|
"20 L'Élixir d'amour 11, 13 mai 0.006390 \n",
|
|
"12 Florilège mail ciblé 0.006390 \n",
|
|
"22 Les Nuits d'été - avant spectacle 0.006056 \n",
|
|
"11 Enquête_Bal de Paris 0.005786 \n",
|
|
"27 Luisa Miller 23 mars 0.004339 \n",
|
|
"24 Les nuits d'été ajout - mail ciblé 13/10/23 0.003465 \n",
|
|
"13 GRANDS EVENEMENTS 0.003386 \n",
|
|
"41 Poppée 3 octobre 23 0.002416 \n",
|
|
"62 liste mécénat et prospect 0.002273 \n",
|
|
"40 Poppée 1 octobre 23 uniquement 0.002162 \n",
|
|
"21 L'Élixir d'amour 5, 7, 9 mai 0.002098 \n",
|
|
"37 PRESSE LOCALE 0.001891 \n",
|
|
"26 Luisa Miller 23 et 25 mars 0.001764 \n",
|
|
"59 liste invites grand boum 0.001637 \n",
|
|
"16 INVITS PREMIERES 0.001494 \n",
|
|
"25 Luisa Miller 19 et 21 mars 0.001446 \n",
|
|
"19 Invités TNB 0.001208 \n",
|
|
"44 Protocole : REGIONAUX 0.001160 \n",
|
|
"54 assos danse 0.001160 \n",
|
|
"45 Protocole Objectif Choeurs 0.001160 \n",
|
|
"51 Titulaires cartes Opéra 0.000985 \n",
|
|
"7 Ecoles élémentaires Rennes 0.000954 \n",
|
|
"53 Zaïde 10 et 12 février 0.000827 \n",
|
|
"60 liste invités soirée 11 septembre mba 0.000795 \n",
|
|
"17 Info Tutelle 0.000795 \n",
|
|
"61 liste invités soirée 11 septembre mba 2 0.000763 \n",
|
|
"14 Hira Gasy Spectateurs 0.000731 \n",
|
|
"48 Spectateurs Passion selon Brockes 0.000636 \n",
|
|
"47 Spectateurs Oratorios pour Passion 0.000525 \n",
|
|
"36 PERSONNEL OPERA 0.000493 \n",
|
|
"57 invités représentation du 1er octobre2023 0.000445 \n",
|
|
"64 mécène - vernissage frac 0.000429 \n",
|
|
"15 INVITS CONF PRESSE ETE 2020 0.000381 \n",
|
|
"8 Elixir d'amour 15/04 14h30 0.000350 \n",
|
|
"9 Elixir d'amour 15/04 17h30 0.000350 \n",
|
|
"39 PRESSE NUMERIQUE 0.000334 \n",
|
|
"55 invitations représentation 3 octobre2023 0.000334 \n",
|
|
"58 liste chargé.e.s de communication 0.000334 \n",
|
|
"29 MECENES 0.000207 \n",
|
|
"56 invités représentation 1er octobre2023 0.000207 \n",
|
|
"35 OPERA 0.000159 \n",
|
|
"63 liste mécénat et prospect 2 - erreur mail 0.000064 \n",
|
|
"49 TEST ENVOI 0.000064 \n",
|
|
"3 Choeur ouvert 22-23 0.000032 \n",
|
|
"\n",
|
|
" cumulative_customers \n",
|
|
"50 0.208018 \n",
|
|
"31 0.375111 \n",
|
|
"65 0.500539 \n",
|
|
"0 0.564416 \n",
|
|
"34 0.607585 \n",
|
|
"2 0.646522 \n",
|
|
"30 0.684164 \n",
|
|
"5 0.720375 \n",
|
|
"6 0.756545 \n",
|
|
"32 0.792489 \n",
|
|
"52 0.825554 \n",
|
|
"46 0.854443 \n",
|
|
"28 0.880967 \n",
|
|
"10 0.899334 \n",
|
|
"1 0.914853 \n",
|
|
"33 0.929543 \n",
|
|
"18 0.939172 \n",
|
|
"4 0.947008 \n",
|
|
"23 0.952994 \n",
|
|
"42 0.957499 \n",
|
|
"43 0.961610 \n",
|
|
"38 0.965141 \n",
|
|
"20 0.968375 \n",
|
|
"12 0.971609 \n",
|
|
"22 0.974675 \n",
|
|
"11 0.977603 \n",
|
|
"27 0.979799 \n",
|
|
"24 0.981553 \n",
|
|
"13 0.983267 \n",
|
|
"41 0.984489 \n",
|
|
"62 0.985640 \n",
|
|
"40 0.986734 \n",
|
|
"21 0.987796 \n",
|
|
"37 0.988753 \n",
|
|
"26 0.989646 \n",
|
|
"59 0.990475 \n",
|
|
"16 0.991231 \n",
|
|
"25 0.991963 \n",
|
|
"19 0.992575 \n",
|
|
"44 0.993162 \n",
|
|
"54 0.993749 \n",
|
|
"45 0.994336 \n",
|
|
"51 0.994835 \n",
|
|
"7 0.995318 \n",
|
|
"53 0.995736 \n",
|
|
"60 0.996138 \n",
|
|
"17 0.996541 \n",
|
|
"61 0.996927 \n",
|
|
"14 0.997297 \n",
|
|
"48 0.997619 \n",
|
|
"47 0.997884 \n",
|
|
"36 0.998134 \n",
|
|
"57 0.998359 \n",
|
|
"64 0.998576 \n",
|
|
"15 0.998769 \n",
|
|
"8 0.998946 \n",
|
|
"9 0.999123 \n",
|
|
"39 0.999292 \n",
|
|
"55 0.999461 \n",
|
|
"58 0.999630 \n",
|
|
"29 0.999735 \n",
|
|
"56 0.999839 \n",
|
|
"35 0.999920 \n",
|
|
"63 0.999952 \n",
|
|
"49 0.999984 \n",
|
|
"3 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"print_main_target('11', 100)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "603b11e4-5d76-4699-a1b2-e795929edc04",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n",
|
|
"Nombre de ciblage : 1409140\n",
|
|
"Nombre de client avec étiquette target : 242726\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>target_name</th>\n",
|
|
" <th>customer_id</th>\n",
|
|
" <th>cumulative_customers</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>45</th>\n",
|
|
" <td>CAT/buit all</td>\n",
|
|
" <td>0.755387</td>\n",
|
|
" <td>0.130116</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>274</th>\n",
|
|
" <td>lista bbdd opt-in</td>\n",
|
|
" <td>0.510559</td>\n",
|
|
" <td>0.218061</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>157</th>\n",
|
|
" <td>Obren mails</td>\n",
|
|
" <td>0.398466</td>\n",
|
|
" <td>0.286697</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>165</th>\n",
|
|
" <td>Participantes por primera vez</td>\n",
|
|
" <td>0.376012</td>\n",
|
|
" <td>0.351465</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>267</th>\n",
|
|
" <td>buit all</td>\n",
|
|
" <td>0.355907</td>\n",
|
|
" <td>0.412771</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>158</th>\n",
|
|
" <td>Obren mails CAT</td>\n",
|
|
" <td>0.302773</td>\n",
|
|
" <td>0.464924</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>94</th>\n",
|
|
" <td>ES all</td>\n",
|
|
" <td>0.218934</td>\n",
|
|
" <td>0.502636</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>105</th>\n",
|
|
" <td>Festivals 19-21-22-23 CAT</td>\n",
|
|
" <td>0.213348</td>\n",
|
|
" <td>0.539385</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>51</th>\n",
|
|
" <td>Comptes cashless - tots</td>\n",
|
|
" <td>0.176883</td>\n",
|
|
" <td>0.569853</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>265</th>\n",
|
|
" <td>allcomptespersonalsambnom</td>\n",
|
|
" <td>0.176883</td>\n",
|
|
" <td>0.600321</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>264</th>\n",
|
|
" <td>allcomptespersonals</td>\n",
|
|
" <td>0.176878</td>\n",
|
|
" <td>0.630789</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>268</th>\n",
|
|
" <td>compradores habituales</td>\n",
|
|
" <td>0.155327</td>\n",
|
|
" <td>0.657544</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>49</th>\n",
|
|
" <td>Compradors CE2023</td>\n",
|
|
" <td>0.135478</td>\n",
|
|
" <td>0.680881</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>All CE2023 CAT</td>\n",
|
|
" <td>0.116329</td>\n",
|
|
" <td>0.700918</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>160</th>\n",
|
|
" <td>Obren mails ES</td>\n",
|
|
" <td>0.089224</td>\n",
|
|
" <td>0.716287</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>263</th>\n",
|
|
" <td>all unsubscribed 09.03.23</td>\n",
|
|
" <td>0.087766</td>\n",
|
|
" <td>0.731405</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>All unsubscribed 09.03.23</td>\n",
|
|
" <td>0.087766</td>\n",
|
|
" <td>0.746523</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>106</th>\n",
|
|
" <td>Festivals 19-21-22-23 ES</td>\n",
|
|
" <td>0.077594</td>\n",
|
|
" <td>0.759888</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>273</th>\n",
|
|
" <td>lista bbdd opt out</td>\n",
|
|
" <td>0.076214</td>\n",
|
|
" <td>0.773016</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>171</th>\n",
|
|
" <td>Primer control de acceso 07/07/23</td>\n",
|
|
" <td>0.074710</td>\n",
|
|
" <td>0.785885</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>All CE2023 ES</td>\n",
|
|
" <td>0.063673</td>\n",
|
|
" <td>0.796853</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>181</th>\n",
|
|
" <td>Push Joan Miquel Oliver CAT</td>\n",
|
|
" <td>0.056174</td>\n",
|
|
" <td>0.806529</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>200</th>\n",
|
|
" <td>Segments Joan Miquel Oliver AX</td>\n",
|
|
" <td>0.049978</td>\n",
|
|
" <td>0.815138</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>204</th>\n",
|
|
" <td>Segments jajas</td>\n",
|
|
" <td>0.039996</td>\n",
|
|
" <td>0.822027</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>199</th>\n",
|
|
" <td>Segments Fatoumata PowerBI</td>\n",
|
|
" <td>0.037071</td>\n",
|
|
" <td>0.828412</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>278</th>\n",
|
|
" <td>segments fatoumata powerbi</td>\n",
|
|
" <td>0.037071</td>\n",
|
|
" <td>0.834798</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>88</th>\n",
|
|
" <td>Divendres CE2023</td>\n",
|
|
" <td>0.036547</td>\n",
|
|
" <td>0.841093</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>205</th>\n",
|
|
" <td>Segments jajas CAT</td>\n",
|
|
" <td>0.032349</td>\n",
|
|
" <td>0.846665</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>73</th>\n",
|
|
" <td>Dijous CE2023</td>\n",
|
|
" <td>0.028448</td>\n",
|
|
" <td>0.851565</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>83</th>\n",
|
|
" <td>Dissabte CE2023</td>\n",
|
|
" <td>0.028168</td>\n",
|
|
" <td>0.856417</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>201</th>\n",
|
|
" <td>Segments Joan Miquel Oliver PowerBI</td>\n",
|
|
" <td>0.027426</td>\n",
|
|
" <td>0.861142</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>279</th>\n",
|
|
" <td>segments joan miquel oliver powerbi</td>\n",
|
|
" <td>0.027426</td>\n",
|
|
" <td>0.865866</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>178</th>\n",
|
|
" <td>Push Fatoumata CAT</td>\n",
|
|
" <td>0.027253</td>\n",
|
|
" <td>0.870560</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>93</th>\n",
|
|
" <td>EN all</td>\n",
|
|
" <td>0.025675</td>\n",
|
|
" <td>0.874983</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>203</th>\n",
|
|
" <td>Segments Pinpilincinos PowerBI</td>\n",
|
|
" <td>0.020851</td>\n",
|
|
" <td>0.878574</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>280</th>\n",
|
|
" <td>segments pinpilincinos powerbi</td>\n",
|
|
" <td>0.020851</td>\n",
|
|
" <td>0.882166</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>78</th>\n",
|
|
" <td>Dimecres CE2023</td>\n",
|
|
" <td>0.020702</td>\n",
|
|
" <td>0.885732</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>89</th>\n",
|
|
" <td>Divendres CE2023 CAT</td>\n",
|
|
" <td>0.020463</td>\n",
|
|
" <td>0.889257</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>All CE2023 buit</td>\n",
|
|
" <td>0.020002</td>\n",
|
|
" <td>0.892702</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>276</th>\n",
|
|
" <td>regalentradesrondes</td>\n",
|
|
" <td>0.019458</td>\n",
|
|
" <td>0.896054</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>41</th>\n",
|
|
" <td>Assistents concerts last year</td>\n",
|
|
" <td>0.019458</td>\n",
|
|
" <td>0.899405</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>183</th>\n",
|
|
" <td>Push Pinpilincinos CAT</td>\n",
|
|
" <td>0.018708</td>\n",
|
|
" <td>0.902628</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>266</th>\n",
|
|
" <td>barres comedy festival</td>\n",
|
|
" <td>0.018066</td>\n",
|
|
" <td>0.905740</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>44</th>\n",
|
|
" <td>Barres Comedy festival</td>\n",
|
|
" <td>0.018066</td>\n",
|
|
" <td>0.908851</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>270</th>\n",
|
|
" <td>in risus</td>\n",
|
|
" <td>0.018033</td>\n",
|
|
" <td>0.911958</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50</th>\n",
|
|
" <td>Compradors In Risus</td>\n",
|
|
" <td>0.018033</td>\n",
|
|
" <td>0.915064</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>182</th>\n",
|
|
" <td>Push Joan Miquel Oliver ES</td>\n",
|
|
" <td>0.017205</td>\n",
|
|
" <td>0.918027</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>All abo wkd-2dies CE2023</td>\n",
|
|
" <td>0.017097</td>\n",
|
|
" <td>0.920972</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>84</th>\n",
|
|
" <td>Dissabte CE2023 CAT</td>\n",
|
|
" <td>0.016080</td>\n",
|
|
" <td>0.923742</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>42</th>\n",
|
|
" <td>Assistents concerts last year CAT</td>\n",
|
|
" <td>0.015878</td>\n",
|
|
" <td>0.926477</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>Abonament Weekend ST</td>\n",
|
|
" <td>0.015400</td>\n",
|
|
" <td>0.929130</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>176</th>\n",
|
|
" <td>Push Al·lèrgiques CAT</td>\n",
|
|
" <td>0.014642</td>\n",
|
|
" <td>0.931652</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>74</th>\n",
|
|
" <td>Dijous CE2023 CAT</td>\n",
|
|
" <td>0.014139</td>\n",
|
|
" <td>0.934087</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>196</th>\n",
|
|
" <td>Segments Al·lèrgiques AX</td>\n",
|
|
" <td>0.014041</td>\n",
|
|
" <td>0.936506</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>180</th>\n",
|
|
" <td>Push Fatoumata ES</td>\n",
|
|
" <td>0.012158</td>\n",
|
|
" <td>0.938600</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>79</th>\n",
|
|
" <td>Dimecres CE2023 CAT</td>\n",
|
|
" <td>0.010287</td>\n",
|
|
" <td>0.940372</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>All abo wkd-2dies CE2023 CAT</td>\n",
|
|
" <td>0.009888</td>\n",
|
|
" <td>0.942075</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>91</th>\n",
|
|
" <td>Divendres CE2023 ES</td>\n",
|
|
" <td>0.009500</td>\n",
|
|
" <td>0.943712</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>112</th>\n",
|
|
" <td>Funzo & Baby Loud CT2023</td>\n",
|
|
" <td>0.009064</td>\n",
|
|
" <td>0.945273</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>All CE2023 EN</td>\n",
|
|
" <td>0.008232</td>\n",
|
|
" <td>0.946691</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>76</th>\n",
|
|
" <td>Dijous CE2023 ES</td>\n",
|
|
" <td>0.008001</td>\n",
|
|
" <td>0.948069</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>All abo full-4dies CE2023</td>\n",
|
|
" <td>0.007980</td>\n",
|
|
" <td>0.949444</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>184</th>\n",
|
|
" <td>Push Pinpilincinos ES</td>\n",
|
|
" <td>0.007535</td>\n",
|
|
" <td>0.950742</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>206</th>\n",
|
|
" <td>Segments jajas ES</td>\n",
|
|
" <td>0.007412</td>\n",
|
|
" <td>0.952018</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>115</th>\n",
|
|
" <td>Funzo & Baby Loud CT2023 buit</td>\n",
|
|
" <td>0.007321</td>\n",
|
|
" <td>0.953279</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>236</th>\n",
|
|
" <td>The Tyets CT2024 16.11</td>\n",
|
|
" <td>0.006798</td>\n",
|
|
" <td>0.954450</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>86</th>\n",
|
|
" <td>Dissabte CE2023 ES</td>\n",
|
|
" <td>0.006691</td>\n",
|
|
" <td>0.955603</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Abonament Full ST</td>\n",
|
|
" <td>0.006674</td>\n",
|
|
" <td>0.956752</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>159</th>\n",
|
|
" <td>Obren mails EN</td>\n",
|
|
" <td>0.006468</td>\n",
|
|
" <td>0.957867</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>31 FAM CT2023</td>\n",
|
|
" <td>0.006468</td>\n",
|
|
" <td>0.958981</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>202</th>\n",
|
|
" <td>Segments Pinpilincinos AX</td>\n",
|
|
" <td>0.006279</td>\n",
|
|
" <td>0.960062</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>211</th>\n",
|
|
" <td>Sen Senra CH2024</td>\n",
|
|
" <td>0.006007</td>\n",
|
|
" <td>0.961097</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>81</th>\n",
|
|
" <td>Dimecres CE2023 ES</td>\n",
|
|
" <td>0.005735</td>\n",
|
|
" <td>0.962085</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>All abo full-4dies CE2023 CAT</td>\n",
|
|
" <td>0.005451</td>\n",
|
|
" <td>0.963024</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>239</th>\n",
|
|
" <td>The Tyets CT2024 16.11 buit</td>\n",
|
|
" <td>0.005414</td>\n",
|
|
" <td>0.963956</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>77</th>\n",
|
|
" <td>Dijous CE2023 buit</td>\n",
|
|
" <td>0.005257</td>\n",
|
|
" <td>0.964862</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>193</th>\n",
|
|
" <td>SF 7 anys o més</td>\n",
|
|
" <td>0.005203</td>\n",
|
|
" <td>0.965758</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>281</th>\n",
|
|
" <td>sf7anysomes</td>\n",
|
|
" <td>0.005203</td>\n",
|
|
" <td>0.966654</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>All abo wkd-2dies CE2023 ES</td>\n",
|
|
" <td>0.005034</td>\n",
|
|
" <td>0.967521</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>197</th>\n",
|
|
" <td>Segments Al·lèrgiques ST</td>\n",
|
|
" <td>0.004866</td>\n",
|
|
" <td>0.968359</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>277</th>\n",
|
|
" <td>segments al·lèrgiques st</td>\n",
|
|
" <td>0.004866</td>\n",
|
|
" <td>0.969198</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>260</th>\n",
|
|
" <td>XXS Comedy</td>\n",
|
|
" <td>0.004837</td>\n",
|
|
" <td>0.970031</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>92</th>\n",
|
|
" <td>Divendres CE2023 buit</td>\n",
|
|
" <td>0.004828</td>\n",
|
|
" <td>0.970862</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>87</th>\n",
|
|
" <td>Dissabte CE2023 buit</td>\n",
|
|
" <td>0.004614</td>\n",
|
|
" <td>0.971657</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>215</th>\n",
|
|
" <td>Sen Senra CH2024 buit</td>\n",
|
|
" <td>0.004511</td>\n",
|
|
" <td>0.972434</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82</th>\n",
|
|
" <td>Dimecres CE2023 buit</td>\n",
|
|
" <td>0.004421</td>\n",
|
|
" <td>0.973196</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>Abo 3 dies CE2023</td>\n",
|
|
" <td>0.004272</td>\n",
|
|
" <td>0.973932</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>198</th>\n",
|
|
" <td>Segments Fatoumata AX</td>\n",
|
|
" <td>0.004219</td>\n",
|
|
" <td>0.974658</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>66</th>\n",
|
|
" <td>Convidats pro</td>\n",
|
|
" <td>0.003980</td>\n",
|
|
" <td>0.975344</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>269</th>\n",
|
|
" <td>identified_contacts</td>\n",
|
|
" <td>0.003976</td>\n",
|
|
" <td>0.976029</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>177</th>\n",
|
|
" <td>Push Al·lèrgiques ES</td>\n",
|
|
" <td>0.003885</td>\n",
|
|
" <td>0.976698</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>254</th>\n",
|
|
" <td>Viva Suecia CH2024</td>\n",
|
|
" <td>0.003770</td>\n",
|
|
" <td>0.977347</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>220</th>\n",
|
|
" <td>Sidonie CH2024</td>\n",
|
|
" <td>0.003683</td>\n",
|
|
" <td>0.977982</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>194</th>\n",
|
|
" <td>SF 7 anys o més CAT</td>\n",
|
|
" <td>0.003531</td>\n",
|
|
" <td>0.978590</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>43</th>\n",
|
|
" <td>Assistents concerts last year ES</td>\n",
|
|
" <td>0.003518</td>\n",
|
|
" <td>0.979196</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>232</th>\n",
|
|
" <td>The Tyets CT2024 15.11</td>\n",
|
|
" <td>0.003502</td>\n",
|
|
" <td>0.979799</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>283</th>\n",
|
|
" <td>usuaris cruïlla green day</td>\n",
|
|
" <td>0.003242</td>\n",
|
|
" <td>0.980358</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>240</th>\n",
|
|
" <td>Usuaris Cruïlla Green Day</td>\n",
|
|
" <td>0.003242</td>\n",
|
|
" <td>0.980916</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>31 FAM CT2023 CAT</td>\n",
|
|
" <td>0.003176</td>\n",
|
|
" <td>0.981463</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>68</th>\n",
|
|
" <td>Cupido CH2024</td>\n",
|
|
" <td>0.003148</td>\n",
|
|
" <td>0.982005</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" target_name customer_id cumulative_customers\n",
|
|
"45 CAT/buit all 0.755387 0.130116\n",
|
|
"274 lista bbdd opt-in 0.510559 0.218061\n",
|
|
"157 Obren mails 0.398466 0.286697\n",
|
|
"165 Participantes por primera vez 0.376012 0.351465\n",
|
|
"267 buit all 0.355907 0.412771\n",
|
|
"158 Obren mails CAT 0.302773 0.464924\n",
|
|
"94 ES all 0.218934 0.502636\n",
|
|
"105 Festivals 19-21-22-23 CAT 0.213348 0.539385\n",
|
|
"51 Comptes cashless - tots 0.176883 0.569853\n",
|
|
"265 allcomptespersonalsambnom 0.176883 0.600321\n",
|
|
"264 allcomptespersonals 0.176878 0.630789\n",
|
|
"268 compradores habituales 0.155327 0.657544\n",
|
|
"49 Compradors CE2023 0.135478 0.680881\n",
|
|
"16 All CE2023 CAT 0.116329 0.700918\n",
|
|
"160 Obren mails ES 0.089224 0.716287\n",
|
|
"263 all unsubscribed 09.03.23 0.087766 0.731405\n",
|
|
"27 All unsubscribed 09.03.23 0.087766 0.746523\n",
|
|
"106 Festivals 19-21-22-23 ES 0.077594 0.759888\n",
|
|
"273 lista bbdd opt out 0.076214 0.773016\n",
|
|
"171 Primer control de acceso 07/07/23 0.074710 0.785885\n",
|
|
"18 All CE2023 ES 0.063673 0.796853\n",
|
|
"181 Push Joan Miquel Oliver CAT 0.056174 0.806529\n",
|
|
"200 Segments Joan Miquel Oliver AX 0.049978 0.815138\n",
|
|
"204 Segments jajas 0.039996 0.822027\n",
|
|
"199 Segments Fatoumata PowerBI 0.037071 0.828412\n",
|
|
"278 segments fatoumata powerbi 0.037071 0.834798\n",
|
|
"88 Divendres CE2023 0.036547 0.841093\n",
|
|
"205 Segments jajas CAT 0.032349 0.846665\n",
|
|
"73 Dijous CE2023 0.028448 0.851565\n",
|
|
"83 Dissabte CE2023 0.028168 0.856417\n",
|
|
"201 Segments Joan Miquel Oliver PowerBI 0.027426 0.861142\n",
|
|
"279 segments joan miquel oliver powerbi 0.027426 0.865866\n",
|
|
"178 Push Fatoumata CAT 0.027253 0.870560\n",
|
|
"93 EN all 0.025675 0.874983\n",
|
|
"203 Segments Pinpilincinos PowerBI 0.020851 0.878574\n",
|
|
"280 segments pinpilincinos powerbi 0.020851 0.882166\n",
|
|
"78 Dimecres CE2023 0.020702 0.885732\n",
|
|
"89 Divendres CE2023 CAT 0.020463 0.889257\n",
|
|
"19 All CE2023 buit 0.020002 0.892702\n",
|
|
"276 regalentradesrondes 0.019458 0.896054\n",
|
|
"41 Assistents concerts last year 0.019458 0.899405\n",
|
|
"183 Push Pinpilincinos CAT 0.018708 0.902628\n",
|
|
"266 barres comedy festival 0.018066 0.905740\n",
|
|
"44 Barres Comedy festival 0.018066 0.908851\n",
|
|
"270 in risus 0.018033 0.911958\n",
|
|
"50 Compradors In Risus 0.018033 0.915064\n",
|
|
"182 Push Joan Miquel Oliver ES 0.017205 0.918027\n",
|
|
"22 All abo wkd-2dies CE2023 0.017097 0.920972\n",
|
|
"84 Dissabte CE2023 CAT 0.016080 0.923742\n",
|
|
"42 Assistents concerts last year CAT 0.015878 0.926477\n",
|
|
"9 Abonament Weekend ST 0.015400 0.929130\n",
|
|
"176 Push Al·lèrgiques CAT 0.014642 0.931652\n",
|
|
"74 Dijous CE2023 CAT 0.014139 0.934087\n",
|
|
"196 Segments Al·lèrgiques AX 0.014041 0.936506\n",
|
|
"180 Push Fatoumata ES 0.012158 0.938600\n",
|
|
"79 Dimecres CE2023 CAT 0.010287 0.940372\n",
|
|
"23 All abo wkd-2dies CE2023 CAT 0.009888 0.942075\n",
|
|
"91 Divendres CE2023 ES 0.009500 0.943712\n",
|
|
"112 Funzo & Baby Loud CT2023 0.009064 0.945273\n",
|
|
"17 All CE2023 EN 0.008232 0.946691\n",
|
|
"76 Dijous CE2023 ES 0.008001 0.948069\n",
|
|
"20 All abo full-4dies CE2023 0.007980 0.949444\n",
|
|
"184 Push Pinpilincinos ES 0.007535 0.950742\n",
|
|
"206 Segments jajas ES 0.007412 0.952018\n",
|
|
"115 Funzo & Baby Loud CT2023 buit 0.007321 0.953279\n",
|
|
"236 The Tyets CT2024 16.11 0.006798 0.954450\n",
|
|
"86 Dissabte CE2023 ES 0.006691 0.955603\n",
|
|
"7 Abonament Full ST 0.006674 0.956752\n",
|
|
"159 Obren mails EN 0.006468 0.957867\n",
|
|
"0 31 FAM CT2023 0.006468 0.958981\n",
|
|
"202 Segments Pinpilincinos AX 0.006279 0.960062\n",
|
|
"211 Sen Senra CH2024 0.006007 0.961097\n",
|
|
"81 Dimecres CE2023 ES 0.005735 0.962085\n",
|
|
"21 All abo full-4dies CE2023 CAT 0.005451 0.963024\n",
|
|
"239 The Tyets CT2024 16.11 buit 0.005414 0.963956\n",
|
|
"77 Dijous CE2023 buit 0.005257 0.964862\n",
|
|
"193 SF 7 anys o més 0.005203 0.965758\n",
|
|
"281 sf7anysomes 0.005203 0.966654\n",
|
|
"25 All abo wkd-2dies CE2023 ES 0.005034 0.967521\n",
|
|
"197 Segments Al·lèrgiques ST 0.004866 0.968359\n",
|
|
"277 segments al·lèrgiques st 0.004866 0.969198\n",
|
|
"260 XXS Comedy 0.004837 0.970031\n",
|
|
"92 Divendres CE2023 buit 0.004828 0.970862\n",
|
|
"87 Dissabte CE2023 buit 0.004614 0.971657\n",
|
|
"215 Sen Senra CH2024 buit 0.004511 0.972434\n",
|
|
"82 Dimecres CE2023 buit 0.004421 0.973196\n",
|
|
"5 Abo 3 dies CE2023 0.004272 0.973932\n",
|
|
"198 Segments Fatoumata AX 0.004219 0.974658\n",
|
|
"66 Convidats pro 0.003980 0.975344\n",
|
|
"269 identified_contacts 0.003976 0.976029\n",
|
|
"177 Push Al·lèrgiques ES 0.003885 0.976698\n",
|
|
"254 Viva Suecia CH2024 0.003770 0.977347\n",
|
|
"220 Sidonie CH2024 0.003683 0.977982\n",
|
|
"194 SF 7 anys o més CAT 0.003531 0.978590\n",
|
|
"43 Assistents concerts last year ES 0.003518 0.979196\n",
|
|
"232 The Tyets CT2024 15.11 0.003502 0.979799\n",
|
|
"283 usuaris cruïlla green day 0.003242 0.980358\n",
|
|
"240 Usuaris Cruïlla Green Day 0.003242 0.980916\n",
|
|
"1 31 FAM CT2023 CAT 0.003176 0.981463\n",
|
|
"68 Cupido CH2024 0.003148 0.982005"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"print_main_target('12', 100)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "a115ebcf-4488-47f3-9d7e-75a1fca52f0f",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n",
|
|
"Nombre de ciblage : 779658\n",
|
|
"Nombre de client avec étiquette target : 240541\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>target_name</th>\n",
|
|
" <th>customer_id</th>\n",
|
|
" <th>cumulative_customers</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>BDS 17/18/19</td>\n",
|
|
" <td>0.371483</td>\n",
|
|
" <td>0.114611</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>28</th>\n",
|
|
" <td>Cible non-acheteurs franciliens</td>\n",
|
|
" <td>0.208871</td>\n",
|
|
" <td>0.179052</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>34</th>\n",
|
|
" <td>FHU 2018 - Acheteurs bds</td>\n",
|
|
" <td>0.184817</td>\n",
|
|
" <td>0.236071</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>BDS FHU18</td>\n",
|
|
" <td>0.184505</td>\n",
|
|
" <td>0.292995</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>BDS 2021</td>\n",
|
|
" <td>0.183229</td>\n",
|
|
" <td>0.349525</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>79</th>\n",
|
|
" <td>importer_huma</td>\n",
|
|
" <td>0.183229</td>\n",
|
|
" <td>0.406055</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>BDS FHU21</td>\n",
|
|
" <td>0.183224</td>\n",
|
|
" <td>0.462584</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>BCOM 17/18/19</td>\n",
|
|
" <td>0.162247</td>\n",
|
|
" <td>0.512640</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>BDS FHU23 VDéf</td>\n",
|
|
" <td>0.152298</td>\n",
|
|
" <td>0.559627</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>BDS 2022</td>\n",
|
|
" <td>0.139095</td>\n",
|
|
" <td>0.602541</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>Cible non-acheteurs provinciaux</td>\n",
|
|
" <td>0.136991</td>\n",
|
|
" <td>0.644806</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>BCOM 2023 - PASS 3 JOURS</td>\n",
|
|
" <td>0.106946</td>\n",
|
|
" <td>0.677801</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>55</th>\n",
|
|
" <td>PROSPECT CONTACT -35ANS</td>\n",
|
|
" <td>0.103483</td>\n",
|
|
" <td>0.709728</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>73</th>\n",
|
|
" <td>billets bds au 11/09</td>\n",
|
|
" <td>0.077604</td>\n",
|
|
" <td>0.733670</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>64</th>\n",
|
|
" <td>Pass 3J</td>\n",
|
|
" <td>0.070337</td>\n",
|
|
" <td>0.755371</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>BCOM FHU18</td>\n",
|
|
" <td>0.064214</td>\n",
|
|
" <td>0.775182</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>BCOM 2018</td>\n",
|
|
" <td>0.059470</td>\n",
|
|
" <td>0.793530</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>72</th>\n",
|
|
" <td>bds 2022 +50 ans</td>\n",
|
|
" <td>0.052519</td>\n",
|
|
" <td>0.809733</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>BCOM FHU19</td>\n",
|
|
" <td>0.045593</td>\n",
|
|
" <td>0.823800</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>36</th>\n",
|
|
" <td>Festivaliers WEB 2020 - plein tarif</td>\n",
|
|
" <td>0.045410</td>\n",
|
|
" <td>0.837810</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>CAMPING</td>\n",
|
|
" <td>0.041178</td>\n",
|
|
" <td>0.850514</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>41</th>\n",
|
|
" <td>Inscriptions newsletters (depuis 2019)</td>\n",
|
|
" <td>0.039598</td>\n",
|
|
" <td>0.862731</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>56</th>\n",
|
|
" <td>PROSPECT FHU NORMANDIE 2021 #1</td>\n",
|
|
" <td>0.038925</td>\n",
|
|
" <td>0.874740</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>Camping FHU22</td>\n",
|
|
" <td>0.033716</td>\n",
|
|
" <td>0.885142</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>ACTIVATION BDS 24/08</td>\n",
|
|
" <td>0.031862</td>\n",
|
|
" <td>0.894972</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75</th>\n",
|
|
" <td>data bcom 2023 - 15.06.23</td>\n",
|
|
" <td>0.026594</td>\n",
|
|
" <td>0.903177</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>CAMPING 2021 #1</td>\n",
|
|
" <td>0.022154</td>\n",
|
|
" <td>0.910012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>CAMPING 24/08</td>\n",
|
|
" <td>0.022150</td>\n",
|
|
" <td>0.916846</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>58</th>\n",
|
|
" <td>PROSPECT FHU NORMANDIE 2021 #3</td>\n",
|
|
" <td>0.020579</td>\n",
|
|
" <td>0.923195</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>54</th>\n",
|
|
" <td>PROSPECT ACHETEURS FHU23 POUR FHU NORMANDIE 2023</td>\n",
|
|
" <td>0.017901</td>\n",
|
|
" <td>0.928717</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>85</th>\n",
|
|
" <td>prospect acheteurs fhu23_pour_fhu rouen 2023</td>\n",
|
|
" <td>0.017901</td>\n",
|
|
" <td>0.934240</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>63</th>\n",
|
|
" <td>PROSPECT NORMANDIE</td>\n",
|
|
" <td>0.014118</td>\n",
|
|
" <td>0.938596</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>60</th>\n",
|
|
" <td>PROSPECT FHU NORMANDIE 2022 BDS</td>\n",
|
|
" <td>0.013777</td>\n",
|
|
" <td>0.942847</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>87</th>\n",
|
|
" <td>prospect bds fhu normandie</td>\n",
|
|
" <td>0.013777</td>\n",
|
|
" <td>0.947097</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>44</th>\n",
|
|
" <td>PARKING 2021 #1</td>\n",
|
|
" <td>0.012484</td>\n",
|
|
" <td>0.950949</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>45</th>\n",
|
|
" <td>PARKING 24/08</td>\n",
|
|
" <td>0.012081</td>\n",
|
|
" <td>0.954676</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>42</th>\n",
|
|
" <td>PARKING - PORTE B</td>\n",
|
|
" <td>0.011848</td>\n",
|
|
" <td>0.958332</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>43</th>\n",
|
|
" <td>PARKING - PORTE J</td>\n",
|
|
" <td>0.011736</td>\n",
|
|
" <td>0.961953</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>86</th>\n",
|
|
" <td>prospect bcom fhu normandie</td>\n",
|
|
" <td>0.010709</td>\n",
|
|
" <td>0.965257</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>59</th>\n",
|
|
" <td>PROSPECT FHU NORMANDIE 2022 BCOM</td>\n",
|
|
" <td>0.010709</td>\n",
|
|
" <td>0.968561</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50</th>\n",
|
|
" <td>PASS 3J - ENFANTS</td>\n",
|
|
" <td>0.009716</td>\n",
|
|
" <td>0.971558</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>49</th>\n",
|
|
" <td>PASS 3J - ADOS</td>\n",
|
|
" <td>0.008643</td>\n",
|
|
" <td>0.974225</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>BILLETS NUIT</td>\n",
|
|
" <td>0.007333</td>\n",
|
|
" <td>0.976487</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>51</th>\n",
|
|
" <td>PASS Culture 2023 au 11/09</td>\n",
|
|
" <td>0.007092</td>\n",
|
|
" <td>0.978675</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>84</th>\n",
|
|
" <td>pass culture 11/09</td>\n",
|
|
" <td>0.007092</td>\n",
|
|
" <td>0.980863</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>83</th>\n",
|
|
" <td>pass culture - pass 3j</td>\n",
|
|
" <td>0.007076</td>\n",
|
|
" <td>0.983046</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Ados FHU22</td>\n",
|
|
" <td>0.006718</td>\n",
|
|
" <td>0.985119</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>33</th>\n",
|
|
" <td>Enfants FHU22</td>\n",
|
|
" <td>0.006631</td>\n",
|
|
" <td>0.987165</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>46</th>\n",
|
|
" <td>PARKING CAMPEURS</td>\n",
|
|
" <td>0.004781</td>\n",
|
|
" <td>0.988640</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>39</th>\n",
|
|
" <td>INSCRIPTION NL VOYAGES HUMA</td>\n",
|
|
" <td>0.003625</td>\n",
|
|
" <td>0.989758</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>BILLET CAMPING-CAR</td>\n",
|
|
" <td>0.002594</td>\n",
|
|
" <td>0.990559</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>61</th>\n",
|
|
" <td>PROSPECT FORUM LOGEMENT 2022</td>\n",
|
|
" <td>0.002062</td>\n",
|
|
" <td>0.991195</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>Camping-car FHU22</td>\n",
|
|
" <td>0.001996</td>\n",
|
|
" <td>0.991811</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>26</th>\n",
|
|
" <td>Camping Zen FHU22</td>\n",
|
|
" <td>0.001596</td>\n",
|
|
" <td>0.992303</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>66</th>\n",
|
|
" <td>RESP. STANDS POUR FHUA2020</td>\n",
|
|
" <td>0.001559</td>\n",
|
|
" <td>0.992784</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>52</th>\n",
|
|
" <td>PRESSE 2021</td>\n",
|
|
" <td>0.001559</td>\n",
|
|
" <td>0.993265</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>70</th>\n",
|
|
" <td>accreditations presse_fhu23</td>\n",
|
|
" <td>0.001434</td>\n",
|
|
" <td>0.993707</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>ACCREDITES - FHU 23</td>\n",
|
|
" <td>0.001434</td>\n",
|
|
" <td>0.994150</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>74</th>\n",
|
|
" <td>billets enfants 2023_06_05</td>\n",
|
|
" <td>0.001239</td>\n",
|
|
" <td>0.994532</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>ACHETEURS TIPIS</td>\n",
|
|
" <td>0.001023</td>\n",
|
|
" <td>0.994848</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>BCOM FHU NORMANDIE 2021</td>\n",
|
|
" <td>0.001023</td>\n",
|
|
" <td>0.995163</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>88</th>\n",
|
|
" <td>prospect forum logement 2023</td>\n",
|
|
" <td>0.001014</td>\n",
|
|
" <td>0.995476</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>62</th>\n",
|
|
" <td>PROSPECT FORUM LOGEMENT 2023</td>\n",
|
|
" <td>0.001014</td>\n",
|
|
" <td>0.995789</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>38</th>\n",
|
|
" <td>INSCRIPTION FORUM LOGEMENT - 14/02</td>\n",
|
|
" <td>0.000840</td>\n",
|
|
" <td>0.996048</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>57</th>\n",
|
|
" <td>PROSPECT FHU NORMANDIE 2021 #2</td>\n",
|
|
" <td>0.000802</td>\n",
|
|
" <td>0.996296</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>CN - 38e CONGRÈS / 2021</td>\n",
|
|
" <td>0.000790</td>\n",
|
|
" <td>0.996540</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>65</th>\n",
|
|
" <td>RESP. STANDS PCF 2021</td>\n",
|
|
" <td>0.000765</td>\n",
|
|
" <td>0.996776</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>40</th>\n",
|
|
" <td>INSCRITS VISIO 14/01/22</td>\n",
|
|
" <td>0.000698</td>\n",
|
|
" <td>0.996991</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>CROISIÉRISTES 2021 / RIVAGES DU MONDE</td>\n",
|
|
" <td>0.000686</td>\n",
|
|
" <td>0.997203</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>31</th>\n",
|
|
" <td>DINER DE PRESSE 2020</td>\n",
|
|
" <td>0.000669</td>\n",
|
|
" <td>0.997409</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>71</th>\n",
|
|
" <td>acheteurs bcom fhu normandie 2022</td>\n",
|
|
" <td>0.000657</td>\n",
|
|
" <td>0.997612</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>ACHETEURS BCOM FHU NORMANDIE 2022</td>\n",
|
|
" <td>0.000657</td>\n",
|
|
" <td>0.997814</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>47</th>\n",
|
|
" <td>PARKING PMR - PORTE B</td>\n",
|
|
" <td>0.000636</td>\n",
|
|
" <td>0.998011</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>53</th>\n",
|
|
" <td>PROSPECT / TOURISTRA</td>\n",
|
|
" <td>0.000628</td>\n",
|
|
" <td>0.998204</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>48</th>\n",
|
|
" <td>PARKING PMR - PORTE J</td>\n",
|
|
" <td>0.000590</td>\n",
|
|
" <td>0.998386</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>78</th>\n",
|
|
" <td>fichier presse - journalistes2</td>\n",
|
|
" <td>0.000578</td>\n",
|
|
" <td>0.998565</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>35</th>\n",
|
|
" <td>FHU22 - DIFFUSION CP</td>\n",
|
|
" <td>0.000570</td>\n",
|
|
" <td>0.998740</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32</th>\n",
|
|
" <td>DINER DE PRESSE 2021</td>\n",
|
|
" <td>0.000549</td>\n",
|
|
" <td>0.998910</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>81</th>\n",
|
|
" <td>liste médias cp</td>\n",
|
|
" <td>0.000516</td>\n",
|
|
" <td>0.999069</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>67</th>\n",
|
|
" <td>SECRETAIRES FÉDÉRAUX 2021</td>\n",
|
|
" <td>0.000437</td>\n",
|
|
" <td>0.999203</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>68</th>\n",
|
|
" <td>Soirée solidarité Ukraine</td>\n",
|
|
" <td>0.000437</td>\n",
|
|
" <td>0.999338</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>89</th>\n",
|
|
" <td>tipi 2023</td>\n",
|
|
" <td>0.000412</td>\n",
|
|
" <td>0.999465</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>69</th>\n",
|
|
" <td>TIPI 2023</td>\n",
|
|
" <td>0.000412</td>\n",
|
|
" <td>0.999592</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>77</th>\n",
|
|
" <td>fhu22 - pass 3j shotgun</td>\n",
|
|
" <td>0.000353</td>\n",
|
|
" <td>0.999701</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>DEMANDES PRESSE 2022</td>\n",
|
|
" <td>0.000283</td>\n",
|
|
" <td>0.999788</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>76</th>\n",
|
|
" <td>demandes accréditation 2023</td>\n",
|
|
" <td>0.000254</td>\n",
|
|
" <td>0.999867</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>COIN DES MÔMES</td>\n",
|
|
" <td>0.000170</td>\n",
|
|
" <td>0.999919</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>82</th>\n",
|
|
" <td>orders_302493 (1)</td>\n",
|
|
" <td>0.000108</td>\n",
|
|
" <td>0.999953</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>37</th>\n",
|
|
" <td>Humacumba Shotgun FHU22</td>\n",
|
|
" <td>0.000108</td>\n",
|
|
" <td>0.999986</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>80</th>\n",
|
|
" <td>liste agences cp</td>\n",
|
|
" <td>0.000046</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" target_name customer_id \\\n",
|
|
"11 BDS 17/18/19 0.371483 \n",
|
|
"28 Cible non-acheteurs franciliens 0.208871 \n",
|
|
"34 FHU 2018 - Acheteurs bds 0.184817 \n",
|
|
"14 BDS FHU18 0.184505 \n",
|
|
"12 BDS 2021 0.183229 \n",
|
|
"79 importer_huma 0.183229 \n",
|
|
"15 BDS FHU21 0.183224 \n",
|
|
"5 BCOM 17/18/19 0.162247 \n",
|
|
"16 BDS FHU23 VDéf 0.152298 \n",
|
|
"13 BDS 2022 0.139095 \n",
|
|
"29 Cible non-acheteurs provinciaux 0.136991 \n",
|
|
"7 BCOM 2023 - PASS 3 JOURS 0.106946 \n",
|
|
"55 PROSPECT CONTACT -35ANS 0.103483 \n",
|
|
"73 billets bds au 11/09 0.077604 \n",
|
|
"64 Pass 3J 0.070337 \n",
|
|
"9 BCOM FHU18 0.064214 \n",
|
|
"6 BCOM 2018 0.059470 \n",
|
|
"72 bds 2022 +50 ans 0.052519 \n",
|
|
"10 BCOM FHU19 0.045593 \n",
|
|
"36 Festivaliers WEB 2020 - plein tarif 0.045410 \n",
|
|
"19 CAMPING 0.041178 \n",
|
|
"41 Inscriptions newsletters (depuis 2019) 0.039598 \n",
|
|
"56 PROSPECT FHU NORMANDIE 2021 #1 0.038925 \n",
|
|
"25 Camping FHU22 0.033716 \n",
|
|
"3 ACTIVATION BDS 24/08 0.031862 \n",
|
|
"75 data bcom 2023 - 15.06.23 0.026594 \n",
|
|
"20 CAMPING 2021 #1 0.022154 \n",
|
|
"21 CAMPING 24/08 0.022150 \n",
|
|
"58 PROSPECT FHU NORMANDIE 2021 #3 0.020579 \n",
|
|
"54 PROSPECT ACHETEURS FHU23 POUR FHU NORMANDIE 2023 0.017901 \n",
|
|
"85 prospect acheteurs fhu23_pour_fhu rouen 2023 0.017901 \n",
|
|
"63 PROSPECT NORMANDIE 0.014118 \n",
|
|
"60 PROSPECT FHU NORMANDIE 2022 BDS 0.013777 \n",
|
|
"87 prospect bds fhu normandie 0.013777 \n",
|
|
"44 PARKING 2021 #1 0.012484 \n",
|
|
"45 PARKING 24/08 0.012081 \n",
|
|
"42 PARKING - PORTE B 0.011848 \n",
|
|
"43 PARKING - PORTE J 0.011736 \n",
|
|
"86 prospect bcom fhu normandie 0.010709 \n",
|
|
"59 PROSPECT FHU NORMANDIE 2022 BCOM 0.010709 \n",
|
|
"50 PASS 3J - ENFANTS 0.009716 \n",
|
|
"49 PASS 3J - ADOS 0.008643 \n",
|
|
"18 BILLETS NUIT 0.007333 \n",
|
|
"51 PASS Culture 2023 au 11/09 0.007092 \n",
|
|
"84 pass culture 11/09 0.007092 \n",
|
|
"83 pass culture - pass 3j 0.007076 \n",
|
|
"4 Ados FHU22 0.006718 \n",
|
|
"33 Enfants FHU22 0.006631 \n",
|
|
"46 PARKING CAMPEURS 0.004781 \n",
|
|
"39 INSCRIPTION NL VOYAGES HUMA 0.003625 \n",
|
|
"17 BILLET CAMPING-CAR 0.002594 \n",
|
|
"61 PROSPECT FORUM LOGEMENT 2022 0.002062 \n",
|
|
"27 Camping-car FHU22 0.001996 \n",
|
|
"26 Camping Zen FHU22 0.001596 \n",
|
|
"66 RESP. STANDS POUR FHUA2020 0.001559 \n",
|
|
"52 PRESSE 2021 0.001559 \n",
|
|
"70 accreditations presse_fhu23 0.001434 \n",
|
|
"0 ACCREDITES - FHU 23 0.001434 \n",
|
|
"74 billets enfants 2023_06_05 0.001239 \n",
|
|
"2 ACHETEURS TIPIS 0.001023 \n",
|
|
"8 BCOM FHU NORMANDIE 2021 0.001023 \n",
|
|
"88 prospect forum logement 2023 0.001014 \n",
|
|
"62 PROSPECT FORUM LOGEMENT 2023 0.001014 \n",
|
|
"38 INSCRIPTION FORUM LOGEMENT - 14/02 0.000840 \n",
|
|
"57 PROSPECT FHU NORMANDIE 2021 #2 0.000802 \n",
|
|
"22 CN - 38e CONGRÈS / 2021 0.000790 \n",
|
|
"65 RESP. STANDS PCF 2021 0.000765 \n",
|
|
"40 INSCRITS VISIO 14/01/22 0.000698 \n",
|
|
"24 CROISIÉRISTES 2021 / RIVAGES DU MONDE 0.000686 \n",
|
|
"31 DINER DE PRESSE 2020 0.000669 \n",
|
|
"71 acheteurs bcom fhu normandie 2022 0.000657 \n",
|
|
"1 ACHETEURS BCOM FHU NORMANDIE 2022 0.000657 \n",
|
|
"47 PARKING PMR - PORTE B 0.000636 \n",
|
|
"53 PROSPECT / TOURISTRA 0.000628 \n",
|
|
"48 PARKING PMR - PORTE J 0.000590 \n",
|
|
"78 fichier presse - journalistes2 0.000578 \n",
|
|
"35 FHU22 - DIFFUSION CP 0.000570 \n",
|
|
"32 DINER DE PRESSE 2021 0.000549 \n",
|
|
"81 liste médias cp 0.000516 \n",
|
|
"67 SECRETAIRES FÉDÉRAUX 2021 0.000437 \n",
|
|
"68 Soirée solidarité Ukraine 0.000437 \n",
|
|
"89 tipi 2023 0.000412 \n",
|
|
"69 TIPI 2023 0.000412 \n",
|
|
"77 fhu22 - pass 3j shotgun 0.000353 \n",
|
|
"30 DEMANDES PRESSE 2022 0.000283 \n",
|
|
"76 demandes accréditation 2023 0.000254 \n",
|
|
"23 COIN DES MÔMES 0.000170 \n",
|
|
"82 orders_302493 (1) 0.000108 \n",
|
|
"37 Humacumba Shotgun FHU22 0.000108 \n",
|
|
"80 liste agences cp 0.000046 \n",
|
|
"\n",
|
|
" cumulative_customers \n",
|
|
"11 0.114611 \n",
|
|
"28 0.179052 \n",
|
|
"34 0.236071 \n",
|
|
"14 0.292995 \n",
|
|
"12 0.349525 \n",
|
|
"79 0.406055 \n",
|
|
"15 0.462584 \n",
|
|
"5 0.512640 \n",
|
|
"16 0.559627 \n",
|
|
"13 0.602541 \n",
|
|
"29 0.644806 \n",
|
|
"7 0.677801 \n",
|
|
"55 0.709728 \n",
|
|
"73 0.733670 \n",
|
|
"64 0.755371 \n",
|
|
"9 0.775182 \n",
|
|
"6 0.793530 \n",
|
|
"72 0.809733 \n",
|
|
"10 0.823800 \n",
|
|
"36 0.837810 \n",
|
|
"19 0.850514 \n",
|
|
"41 0.862731 \n",
|
|
"56 0.874740 \n",
|
|
"25 0.885142 \n",
|
|
"3 0.894972 \n",
|
|
"75 0.903177 \n",
|
|
"20 0.910012 \n",
|
|
"21 0.916846 \n",
|
|
"58 0.923195 \n",
|
|
"54 0.928717 \n",
|
|
"85 0.934240 \n",
|
|
"63 0.938596 \n",
|
|
"60 0.942847 \n",
|
|
"87 0.947097 \n",
|
|
"44 0.950949 \n",
|
|
"45 0.954676 \n",
|
|
"42 0.958332 \n",
|
|
"43 0.961953 \n",
|
|
"86 0.965257 \n",
|
|
"59 0.968561 \n",
|
|
"50 0.971558 \n",
|
|
"49 0.974225 \n",
|
|
"18 0.976487 \n",
|
|
"51 0.978675 \n",
|
|
"84 0.980863 \n",
|
|
"83 0.983046 \n",
|
|
"4 0.985119 \n",
|
|
"33 0.987165 \n",
|
|
"46 0.988640 \n",
|
|
"39 0.989758 \n",
|
|
"17 0.990559 \n",
|
|
"61 0.991195 \n",
|
|
"27 0.991811 \n",
|
|
"26 0.992303 \n",
|
|
"66 0.992784 \n",
|
|
"52 0.993265 \n",
|
|
"70 0.993707 \n",
|
|
"0 0.994150 \n",
|
|
"74 0.994532 \n",
|
|
"2 0.994848 \n",
|
|
"8 0.995163 \n",
|
|
"88 0.995476 \n",
|
|
"62 0.995789 \n",
|
|
"38 0.996048 \n",
|
|
"57 0.996296 \n",
|
|
"22 0.996540 \n",
|
|
"65 0.996776 \n",
|
|
"40 0.996991 \n",
|
|
"24 0.997203 \n",
|
|
"31 0.997409 \n",
|
|
"71 0.997612 \n",
|
|
"1 0.997814 \n",
|
|
"47 0.998011 \n",
|
|
"53 0.998204 \n",
|
|
"48 0.998386 \n",
|
|
"78 0.998565 \n",
|
|
"35 0.998740 \n",
|
|
"32 0.998910 \n",
|
|
"81 0.999069 \n",
|
|
"67 0.999203 \n",
|
|
"68 0.999338 \n",
|
|
"89 0.999465 \n",
|
|
"69 0.999592 \n",
|
|
"77 0.999701 \n",
|
|
"30 0.999788 \n",
|
|
"76 0.999867 \n",
|
|
"23 0.999919 \n",
|
|
"82 0.999953 \n",
|
|
"37 0.999986 \n",
|
|
"80 1.000000 "
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"print_main_target('14', 100)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "605cced5-052f-4a99-ac26-020c5d2ab633",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## KPI sur tags"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "916c3e2b-04d3-4877-b894-8f26f10d926e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"customersplus = load_dataset_2(\"4\", \"customersplus\")[['id', 'structure_id']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "46847b24-15a4-464e-969f-f16ed3653f1f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"structure_tag_mappings = load_dataset_2('4', \"structure_tag_mappings\")[['structure_id', 'tag_id']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3c10c69d-735f-453e-96bf-750697d965d0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"customersplus[customersplus['structure_id'].notna()]['structure_id'].nunique()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9b0e77b3-5f16-4484-9564-7d3826583418",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"len(customersplus[customersplus['structure_id'].notna()])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dfa27722-37f9-435a-8221-8aa6f9a4a107",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"structure_tag_mappings['structure_id'].nunique()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2daabdd5-31e3-4918-9856-9bbc30cde602",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def tags_information(tenant_id, first_tags):\n",
|
|
"\n",
|
|
" customersplus = load_dataset_2(tenant_id, \"customersplus\")[['id', 'structure_id']]\n",
|
|
" customersplus.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
|
|
" tags = load_dataset_2(tenant_id, \"tags\")[['id', 'name']]\n",
|
|
" tags.rename(columns = {'id' : 'tag_id', 'name' : 'tag_name'}, inplace = True)\n",
|
|
" structure_tag_mappings = load_dataset_2(tenant_id, \"structure_tag_mappings\")[['structure_id', 'tag_id']]\n",
|
|
" \n",
|
|
" customer_tags = pd.merge(customersplus, structure_tag_mappings, on = 'structure_id', how = 'left')\n",
|
|
" customer_tags = pd.merge(customer_tags, tags, on = 'tag_id', how = 'inner')\n",
|
|
" \n",
|
|
" nb_customers_with_tag = customer_tags['customer_id'].nunique()\n",
|
|
" \n",
|
|
" print('Nombre de client avec tag : ', nb_customers_with_tag)\n",
|
|
" print('Proportion de clients avec tags : ', nb_customers_with_tag/len(customersplus))\n",
|
|
" print('Moyenne de tags par client : ', len(customer_tags)/nb_customers_with_tag)\n",
|
|
" \n",
|
|
" info = customer_tags.groupby(['tag_id', 'tag_name'])['customer_id'].count().reset_index().sort_values('customer_id', ascending = False).head(first_tags)\n",
|
|
"\n",
|
|
" return info"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0b9f5f71-a927-4cc8-bb0c-9538e28d3553",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tags_information(\"1\", 20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bd5bef41-1774-4601-86b5-b7c1aea8f1d2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tags_information(\"2\", 20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7c2dc3e6-1418-44db-a8c0-4a9d59ec5232",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"load_dataset_2(\"2\", \"tags\")[['id', 'name']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c7b2c670-7122-4f67-b1aa-8c80a10f16d8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tags_information(\"3\", 20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "76639995-252d-4a58-83d8-c0c00900c3a9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tags_information(\"4\", 20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "07e91791-d4d4-42b1-ac18-22d3b0b9f7bd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tags_information(\"101\", 20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "87d131cd-ead0-4ef4-a8ee-b09022d08ffa",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## KPI product"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "26582be9-cfd1-48ea-a0a7-31101fdeb9d1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tenant_id = \"1\"\n",
|
|
"\n",
|
|
"df_product = display_databases(tenant_id, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
|
|
"\n",
|
|
"df_product.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "533bf499-dd56-4d29-b261-ca1e4928c9c7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"nb_tickets_per_events = df_product.groupby(['name_event_types', 'name_events'])['ticket_id'].count().reset_index().sort_values('ticket_id', ascending = False)\n",
|
|
"nb_tickets_per_events['prop_tickets'] = round(nb_tickets_per_events['ticket_id']/len(df_product), 3)\n",
|
|
"nb_tickets_per_events"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"# Fin travail 25/02"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c437eaec",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"# Exemple sur Company 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a1c1fc39",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Chargement données"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "66f8c17b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"BUCKET = \"bdc2324-data/1\"\n",
|
|
"liste_database = fs.ls(BUCKET)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c08e6798",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'type_ofs']\n",
|
|
"\n",
|
|
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
|
|
"liste_database_filtered = [element for element in liste_database if any(element_part in element for element_part in liste_database_select)]\n",
|
|
"\n",
|
|
"# Afficher le résultat\n",
|
|
"print(liste_database_filtered)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "675f518d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# loop to create dataframes from liste\n",
|
|
"files_path = liste_database\n",
|
|
"\n",
|
|
"client_number = files_path[0].split(\"/\")[1]\n",
|
|
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
|
"\n",
|
|
"for i in range(len(files_path)) :\n",
|
|
" current_path = files_path[i]\n",
|
|
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
|
" df = pd.read_csv(file_in)\n",
|
|
" # the pattern of the name is df1xxx\n",
|
|
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
|
|
" globals()[nom_dataframe] = df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e855f403",
|
|
"metadata": {},
|
|
"source": [
|
|
"## customersplus.csv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "91a8f8c4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"a = pd.DataFrame(df1_customersplus.info())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2fda171d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def info_colonnes_dataframe(df):\n",
|
|
" # Créer une liste pour stocker les informations sur chaque colonne\n",
|
|
" infos_colonnes = []\n",
|
|
"\n",
|
|
" # Parcourir les colonnes du DataFrame\n",
|
|
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
|
|
" # Calculer le taux de valeurs manquantes\n",
|
|
" taux_na = serie.isna().mean() * 100\n",
|
|
"\n",
|
|
" # Ajouter les informations à la liste\n",
|
|
" infos_colonnes.append({\n",
|
|
" 'Nom_colonne': nom_colonne,\n",
|
|
" 'Type_colonne': str(serie.dtype),\n",
|
|
" 'Taux_NA': taux_na\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
|
|
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
|
|
"\n",
|
|
" return df_infos_colonnes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "205eeeab",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def cleaning_date(df, column_name):\n",
|
|
" \"\"\"\n",
|
|
" Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
|
|
"\n",
|
|
" Parameters:\n",
|
|
" - df: DataFrame\n",
|
|
" Le DataFrame contenant la colonne à nettoyer.\n",
|
|
" - column_name: str\n",
|
|
" Le nom de la colonne à nettoyer.\n",
|
|
"\n",
|
|
" Returns:\n",
|
|
" - DataFrame\n",
|
|
" Le DataFrame modifié avec la colonne nettoyée.\n",
|
|
" \"\"\"\n",
|
|
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
|
|
" return df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "634282c5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"a = info_colonnes_dataframe(df1_customersplus)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0e8d4133",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"a"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1268ad5a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"a = pd.DataFrame(df1_customersplus.isna().sum()/len(df1_customersplus)*100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bd41dc80",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Selection des variables\n",
|
|
"df1_customersplus_clean = df1_customersplus.copy()\n",
|
|
"\n",
|
|
"cleaning_date(df1_customersplus_clean, 'first_buying_date')\n",
|
|
"cleaning_date(df1_customersplus_clean, 'last_visiting_date')\n",
|
|
"\n",
|
|
"df1_customersplus_clean.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n",
|
|
"df1_customersplus_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "64d0f76b",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## tickets.csv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7e683711",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_tickets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e7b9a52e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_tickets.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "568280e8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_tickets.isna().sum()/len(df1_tickets)*100"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "29ecec90",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Selection des variables\n",
|
|
"df1_tickets_clean = df1_tickets.drop(['lastname', 'firstname', 'email', 'created_at', 'updated_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode'], axis = 1, inplace=True)\n",
|
|
"df1_tickets_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "22bb5de4",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## suppliers.csv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6a9a91f4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_suppliers"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bab4758a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_suppliers.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b5fff251",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_suppliers.isna().sum()/len(df1_suppliers)*100"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8b09e2a3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Selection des variables\n",
|
|
"df1_suppliers_clean = df1_suppliers[['id', 'name']]\n",
|
|
"df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ecee7cdc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_suppliers_clean"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c8e6e69b",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## type_ofs.csv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1a6cff1f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_type_ofs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "93630b41",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_type_ofs.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4f94481a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Selection des variables\n",
|
|
"df1_type_ofs_clean = df1_type_ofs[['id', 'name', 'children']]\n",
|
|
"df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1b2811e2",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## purchases.csv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2455d2e1",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_purchases"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5f9a159d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_purchases.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "db201bf7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Nettoyage purchase_date\n",
|
|
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], utc = True)\n",
|
|
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], format = 'ISO8601')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bd436fca",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_purchases.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "83435862",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Selection des variables\n",
|
|
"df1_purchases_clean = df1_purchases[['id', 'purchase_date', 'customer_id']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f210e730",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## Fusion de l'ensemble des données billétiques"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1f8b3aa7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fusion avec fournisseurs\n",
|
|
"df1_ticket_information = pd.merge(df1_tickets_clean, df1_suppliers_clean, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
|
|
"df1_ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
|
|
"\n",
|
|
"# Fusion avec type de tickets\n",
|
|
"df1_ticket_information = pd.merge(df1_ticket_information, df1_type_ofs_clean, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
|
|
"df1_ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
|
|
"\n",
|
|
"# Fusion avec achats\n",
|
|
"df1_ticket_information = pd.merge(df1_ticket_information, df1_purchases_clean, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
|
|
"df1_ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "83a4d021",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_ticket_information"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "56e6ebd1",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"# Utilisation de fonctions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "88fcde4b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Créer un DataFrame exemple\n",
|
|
"df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n",
|
|
"\n",
|
|
"# Appliquer la fonction pour nettoyer la colonne 'purchase_date' de manière vectorisée\n",
|
|
"df_clean = cleaning_date(df_not_clean, 'opened_at')\n",
|
|
"df_clean.rename(columns = {'opened_at' : 'opened_at_clean'}, inplace = True)\n",
|
|
"\n",
|
|
"test = pd.concat([df1_campaign_stats[['opened_at']].head(20), df_clean], axis=1)\n",
|
|
"\n",
|
|
"test.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "818f69db",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Nettoyage, selection et fusion"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c9654eda",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_ticket_information"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7f2b620c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_ticket_information.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "637bdb72",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Customer information"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "14c52894",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Target area"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d83abfbf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Target.csv cleaning\n",
|
|
"df1_targets_clean = df1_targets[[\"id\", \"target_type_id\", \"name\"]]\n",
|
|
"df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
|
|
"\n",
|
|
"# target_type cleaning\n",
|
|
"df1_target_types_clean = df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
|
|
"\n",
|
|
"#customer_target_mappings cleaning\n",
|
|
"df1_customer_target_mappings_clean = df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
|
|
"\n",
|
|
"# Merge target et target_type\n",
|
|
"df1_targets_full = pd.merge(df1_targets_clean, df1_target_types_clean, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
|
|
"df1_targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
|
|
"\n",
|
|
"# Merge\n",
|
|
"df1_targets_full = pd.merge(df1_customer_target_mappings_clean, df1_targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
|
|
"df1_targets_full.drop(['target_id'], axis = 1, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "90d71b2c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n",
|
|
"len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n",
|
|
"\n",
|
|
"# 99,6% des 151 000 client visés sont catégorisés plusieurs fois et en moyenne 5 fois... \n",
|
|
"df1_targets_test.mean()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2301de1e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_targets_full.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "75fbc2f7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Catégorisation des target_name\n",
|
|
"import pandas as pd\n",
|
|
"import nltk\n",
|
|
"from nltk.tokenize import word_tokenize\n",
|
|
"from nltk.corpus import stopwords\n",
|
|
"from nltk.stem import WordNetLemmatizer\n",
|
|
"from nltk.probability import FreqDist\n",
|
|
"\n",
|
|
"# Téléchargement des ressources nécessaires\n",
|
|
"nltk.download('punkt')\n",
|
|
"nltk.download('stopwords')\n",
|
|
"nltk.download('wordnet')\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "55cddf92",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n",
|
|
"def preprocess_text(texte):\n",
|
|
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
|
|
" texte_concat = ' '.join(texte)\n",
|
|
" \n",
|
|
" # Tokenisation des mots\n",
|
|
" tokens = word_tokenize(texte_concat.lower())\n",
|
|
" \n",
|
|
" # Suppression des mots vides (stopwords)\n",
|
|
" stop_words = set(stopwords.words('french'))\n",
|
|
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
|
|
" \n",
|
|
" # Lemmatisation des mots\n",
|
|
" lemmatizer = WordNetLemmatizer()\n",
|
|
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
|
|
" \n",
|
|
" return lemmatized_tokens\n",
|
|
"\n",
|
|
"\n",
|
|
"# Appliquer le prétraitement à la colonne de texte\n",
|
|
"df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n",
|
|
"\n",
|
|
"# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n",
|
|
"all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n",
|
|
"\n",
|
|
"# Calculer la fréquence des mots\n",
|
|
"freq_dist = FreqDist(all_words)\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7fd98a85",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Affichage des mots les plus fréquents\n",
|
|
"print(\"Mots les plus fréquents:\")\n",
|
|
"for mot, freq in freq_dist.most_common(15):\n",
|
|
" print(f\"{mot}: {freq}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cf94bb1d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import nltk\n",
|
|
"from nltk.tokenize import word_tokenize\n",
|
|
"from nltk.corpus import stopwords\n",
|
|
"from nltk.stem import WordNetLemmatizer\n",
|
|
"\n",
|
|
"# Téléchargement des ressources nécessaires\n",
|
|
"nltk.download('punkt')\n",
|
|
"nltk.download('stopwords')\n",
|
|
"nltk.download('wordnet')\n",
|
|
"\n",
|
|
"# Création de la DataFrame d'exemple\n",
|
|
"data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n",
|
|
"df = pd.DataFrame(data)\n",
|
|
"\n",
|
|
"# Fonction pour prétraiter le texte\n",
|
|
"def preprocess_text(texte):\n",
|
|
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
|
|
" texte_concat = ' '.join(texte)\n",
|
|
" \n",
|
|
" # Tokenisation des mots\n",
|
|
" tokens = word_tokenize(texte_concat.lower())\n",
|
|
" \n",
|
|
" # Suppression des mots vides (stopwords)\n",
|
|
" stop_words = set(stopwords.words('french'))\n",
|
|
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
|
|
" \n",
|
|
" # Lemmatisation des mots\n",
|
|
" lemmatizer = WordNetLemmatizer()\n",
|
|
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
|
|
" \n",
|
|
" return lemmatized_tokens\n",
|
|
"\n",
|
|
"# Appliquer la fonction de prétraitement à la colonne de texte\n",
|
|
"df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n",
|
|
"\n",
|
|
"# Afficher le résultat\n",
|
|
"print(df)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "711d3884",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## Campaign area"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c25b5295",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# campaign_stats cleaning \n",
|
|
"df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
|
|
"cleaning_date(df1_campaign_stats_clean, 'opened_at')\n",
|
|
"cleaning_date(df1_campaign_stats_clean, 'sent_at')\n",
|
|
"cleaning_date(df1_campaign_stats_clean, 'delivered_at')\n",
|
|
"\n",
|
|
"# campaigns cleaning\n",
|
|
"df1_campaigns_clean = df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
|
|
"cleaning_date(df1_campaigns_clean, 'campaign_sent_at')\n",
|
|
"\n",
|
|
"# Merge \n",
|
|
"df1_campaigns_full = pd.merge(df1_campaign_stats_clean, df1_campaigns_clean, on = \"campaign_id\", how = \"left\")\n",
|
|
"df1_campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2a3de6a5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_campaigns_full.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3fc1f446",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_campaigns_information"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "20e69ee3",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## Link area"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d9cbdbce",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_campaigns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c07459f0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_link_stats"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "80ae4c42",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true
|
|
},
|
|
"source": [
|
|
"## Supplier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b50b8f95",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fonction d'exploration pour suppliers.csv = label itr et commission inconnues\n",
|
|
"def suppliers_exploration(suppliers = None) : \n",
|
|
" \n",
|
|
" # Taux de NaN pour ces colonnes\n",
|
|
" label_na = suppliers['label'].isna().sum()/len(suppliers)*100\n",
|
|
" itr_na = suppliers['itr'].isna().sum()/len(suppliers)*100\n",
|
|
" commission_na = suppliers['commission'].isna().sum()/len(suppliers)*100\n",
|
|
"\n",
|
|
" suppliers_desc = pd.DataFrame({'nb_suppliers' : [suppliers['name'].nunique()],\n",
|
|
" 'label_na' : [label_na],\n",
|
|
" 'itr_na' : [itr_na],\n",
|
|
" 'commission_na' : [commission_na]})\n",
|
|
"\n",
|
|
" return suppliers_desc"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7e292935",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_suppliers_desc = suppliers_exploration(suppliers = df1_suppliers)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "05b6f2b0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df1_suppliers_desc"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c9324d80",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"BUCKET = \"bdc2324-data\"\n",
|
|
"liste_folders = fs.ls(BUCKET)\n",
|
|
"\n",
|
|
"liste_files = []\n",
|
|
"for company_folder in liste_folders : \n",
|
|
" liste_files.extend(fs.ls(company_folder))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "10304058",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"liste_database_select = ['suppliers']\n",
|
|
"\n",
|
|
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
|
|
"liste_suppliers = [element for element in liste_files if any(element_part in element for element_part in liste_database_select)]\n",
|
|
"\n",
|
|
"# Afficher le résultat\n",
|
|
"print(liste_suppliers)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ffa423e5",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# loop to create dataframes from file 2\n",
|
|
"def database_loading(database_name = None):\n",
|
|
" files_path = database_name\n",
|
|
" \n",
|
|
" client_number = files_path.split(\"/\")[1]\n",
|
|
" df_prefix = \"df\" + str(client_number) + \"_\"\n",
|
|
" \n",
|
|
" current_path = files_path\n",
|
|
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
|
|
" df = pd.read_csv(file_in)\n",
|
|
"\n",
|
|
" return df, client_number"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "70bdc88d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6a0f567d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_all = pd.DataFrame()\n",
|
|
"\n",
|
|
"for link in liste_suppliers:\n",
|
|
" \n",
|
|
" df_supplier, tenant_id = database_loading(link)\n",
|
|
" \n",
|
|
" df_supplier['tenant_id'] = int(tenant_id)\n",
|
|
"\n",
|
|
" df_all = pd.concat([df_all, df_supplier], axis = 0)\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1522d8cd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# df_all[df_all['tenant_id'] == 101]['name'].unique()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b0e42a61",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] \n",
|
|
"# vad = vente à distance\n",
|
|
"df_all['name'] = df_all['name'].fillna('')\n",
|
|
"\n",
|
|
"df_all['canal_vente_internet'] = df_all['name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d299ae91",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_all.groupby('tenant_id')['canal_vente_internet'].max()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|