BDC-team-1/Exploration_billet_AJ.ipynb

4820 lines
163 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "5bf5c226",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b1a5b9d3",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"import warnings"
]
},
{
"cell_type": "markdown",
"id": "ecfa2219",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1a094277",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "markdown",
"id": "0294ce71-840e-458b-8ffa-cadabbc6da21",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Debut Travail 25/02"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "30d77451-2df6-4c07-8b15-66e0e990ff03",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"\n",
"# Import cleaning and merge functions\n",
"exec(open('0_KPI_functions.py').read())\n",
"\n",
"# Ignore warning\n",
"warnings.filterwarnings('ignore')\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f1b44d3e-76bb-4860-b9db-a2840db7cf39",
"metadata": {},
"outputs": [],
"source": [
"def load_dataset_2(directory_path, file_name):\n",
" \"\"\"\n",
" This function loads csv file\n",
" \"\"\"\n",
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\")\n",
"\n",
" # drop na :\n",
" #df = df.dropna(axis=1, thresh=len(df))\n",
" # if identifier in table : delete it\n",
" if 'identifier' in df.columns:\n",
" df = df.drop(columns = 'identifier')\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "35da2e15-1e23-4653-a214-c6ff8f186e85",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_4/customerplus_cleaned.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>479734</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>184801.0</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1537</td>\n",
" <td>1352</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>504615</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>152176.0</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3832780</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3096540</td>\n",
" <td>3587</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320804</th>\n",
" <td>2637745</td>\n",
" <td>406842</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0.000000</td>\n",
" <td>2.0</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-08 20:30:11+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320805</th>\n",
" <td>23334</td>\n",
" <td>22677</td>\n",
" <td>NaN</td>\n",
" <td>185203.0</td>\n",
" <td>4</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>13.0</td>\n",
" <td>13</td>\n",
" <td>11.692308</td>\n",
" <td>0.0</td>\n",
" <td>25.333333</td>\n",
" <td>2.166667</td>\n",
" <td>152.0</td>\n",
" <td>6</td>\n",
" <td>2018-05-02 07:47:40+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320806</th>\n",
" <td>2641373</td>\n",
" <td>408068</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>12.0</td>\n",
" <td>4</td>\n",
" <td>12.000000</td>\n",
" <td>0.0</td>\n",
" <td>48.000000</td>\n",
" <td>4.000000</td>\n",
" <td>48.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-09 11:46:23+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320807</th>\n",
" <td>2641469</td>\n",
" <td>408160</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>12.000000</td>\n",
" <td>0.0</td>\n",
" <td>12.000000</td>\n",
" <td>1.000000</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-09 18:50:55+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320808</th>\n",
" <td>2641474</td>\n",
" <td>408165</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1342</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>12.000000</td>\n",
" <td>0.0</td>\n",
" <td>12.000000</td>\n",
" <td>1.000000</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>2021-12-09 19:02:42+00:00</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>320809 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 479734 3587 NaN 184801.0 0 \n",
"1 1537 1352 NaN NaN 0 \n",
"2 504615 3587 NaN 152176.0 0 \n",
"3 3832780 3587 NaN NaN 0 \n",
"4 3096540 3587 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"320804 2637745 406842 NaN NaN 1 \n",
"320805 23334 22677 NaN 185203.0 4 \n",
"320806 2641373 408068 NaN NaN 1 \n",
"320807 2641469 408160 NaN NaN 1 \n",
"320808 2641474 408165 NaN NaN 1 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 1342 False NaN 0 True ... \n",
"1 1342 False NaN 0 True ... \n",
"2 1342 False NaN 0 True ... \n",
"3 1342 False NaN 2 True ... \n",
"4 1342 False NaN 2 True ... \n",
"... ... ... ... ... ... ... \n",
"320804 1342 False NaN 0 True ... \n",
"320805 1342 False NaN 0 True ... \n",
"320806 1342 False NaN 0 True ... \n",
"320807 1342 False NaN 0 True ... \n",
"320808 1342 False NaN 0 True ... \n",
"\n",
" max_price ticket_sum average_price average_purchase_delay \\\n",
"0 NaN 0 NaN NaN \n",
"1 NaN 0 NaN NaN \n",
"2 NaN 0 NaN NaN \n",
"3 NaN 0 NaN NaN \n",
"4 NaN 0 NaN NaN \n",
"... ... ... ... ... \n",
"320804 0.0 2 0.000000 2.0 \n",
"320805 13.0 13 11.692308 0.0 \n",
"320806 12.0 4 12.000000 0.0 \n",
"320807 12.0 1 12.000000 0.0 \n",
"320808 12.0 1 12.000000 0.0 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"0 NaN NaN 0.0 \n",
"1 NaN NaN 0.0 \n",
"2 NaN NaN 0.0 \n",
"3 NaN NaN 0.0 \n",
"4 NaN NaN 0.0 \n",
"... ... ... ... \n",
"320804 0.000000 2.000000 0.0 \n",
"320805 25.333333 2.166667 152.0 \n",
"320806 48.000000 4.000000 48.0 \n",
"320807 12.000000 1.000000 12.0 \n",
"320808 12.000000 1.000000 12.0 \n",
"\n",
" purchase_count first_buying_date country \n",
"0 0 NaN fr \n",
"1 0 NaN fr \n",
"2 0 NaN fr \n",
"3 0 NaN fr \n",
"4 0 NaN fr \n",
"... ... ... ... \n",
"320804 1 2021-12-08 20:30:11+00:00 fr \n",
"320805 6 2018-05-02 07:47:40+00:00 fr \n",
"320806 1 2021-12-09 11:46:23+00:00 fr \n",
"320807 1 2021-12-09 18:50:55+00:00 fr \n",
"320808 1 2021-12-09 19:02:42+00:00 fr \n",
"\n",
"[320809 rows x 22 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"display_databases(\"4\", \"customerplus_cleaned\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "6c8ad8c3-25df-4fe4-9ad0-ee5f9498bc14",
"metadata": {},
"outputs": [],
"source": [
"pd.reset_option('display.max_rows')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "c897916c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>code</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>101</td>\n",
" <td>hongrie</td>\n",
" <td>hu</td>\n",
" <td>2023-06-13 11:17:40.600622+02:00</td>\n",
" <td>2023-06-13 11:17:40.600622+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>albanie</td>\n",
" <td>al</td>\n",
" <td>2023-06-13 11:17:40.540652+02:00</td>\n",
" <td>2023-06-13 11:17:40.540652+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>antarctique</td>\n",
" <td>aq</td>\n",
" <td>2023-06-13 11:17:40.541315+02:00</td>\n",
" <td>2023-06-13 11:17:40.541315+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12</td>\n",
" <td>autriche</td>\n",
" <td>at</td>\n",
" <td>2023-06-13 11:17:40.546711+02:00</td>\n",
" <td>2023-06-13 11:17:40.546711+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>samoa américaines</td>\n",
" <td>as</td>\n",
" <td>2023-06-13 11:17:40.542569+02:00</td>\n",
" <td>2023-06-13 11:17:40.542569+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238</th>\n",
" <td>228</td>\n",
" <td>royaume-uni</td>\n",
" <td>gb</td>\n",
" <td>2023-06-13 11:17:40.678023+02:00</td>\n",
" <td>2023-06-13 11:17:40.678023+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>239</th>\n",
" <td>25</td>\n",
" <td>brésil</td>\n",
" <td>br</td>\n",
" <td>2023-06-13 11:17:40.554209+02:00</td>\n",
" <td>2023-06-13 11:17:40.554209+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240</th>\n",
" <td>10</td>\n",
" <td>argentine</td>\n",
" <td>ar</td>\n",
" <td>2023-06-13 11:17:40.545489+02:00</td>\n",
" <td>2023-06-13 11:17:40.545489+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>241</th>\n",
" <td>203</td>\n",
" <td>espagne</td>\n",
" <td>es</td>\n",
" <td>2023-06-13 11:17:40.662472+02:00</td>\n",
" <td>2023-06-13 11:17:40.662472+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>242</th>\n",
" <td>192</td>\n",
" <td>arabie saoudite</td>\n",
" <td>sa</td>\n",
" <td>2023-06-13 11:17:40.656154+02:00</td>\n",
" <td>2023-06-13 11:17:40.656154+02:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>243 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id name code created_at \\\n",
"0 101 hongrie hu 2023-06-13 11:17:40.600622+02:00 \n",
"1 2 albanie al 2023-06-13 11:17:40.540652+02:00 \n",
"2 3 antarctique aq 2023-06-13 11:17:40.541315+02:00 \n",
"3 12 autriche at 2023-06-13 11:17:40.546711+02:00 \n",
"4 5 samoa américaines as 2023-06-13 11:17:40.542569+02:00 \n",
".. ... ... ... ... \n",
"238 228 royaume-uni gb 2023-06-13 11:17:40.678023+02:00 \n",
"239 25 brésil br 2023-06-13 11:17:40.554209+02:00 \n",
"240 10 argentine ar 2023-06-13 11:17:40.545489+02:00 \n",
"241 203 espagne es 2023-06-13 11:17:40.662472+02:00 \n",
"242 192 arabie saoudite sa 2023-06-13 11:17:40.656154+02:00 \n",
"\n",
" updated_at \n",
"0 2023-06-13 11:17:40.600622+02:00 \n",
"1 2023-06-13 11:17:40.540652+02:00 \n",
"2 2023-06-13 11:17:40.541315+02:00 \n",
"3 2023-06-13 11:17:40.546711+02:00 \n",
"4 2023-06-13 11:17:40.542569+02:00 \n",
".. ... \n",
"238 2023-06-13 11:17:40.678023+02:00 \n",
"239 2023-06-13 11:17:40.554209+02:00 \n",
"240 2023-06-13 11:17:40.545489+02:00 \n",
"241 2023-06-13 11:17:40.662472+02:00 \n",
"242 2023-06-13 11:17:40.656154+02:00 \n",
"\n",
"[243 rows x 5 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"load_dataset_2(\"7\", \"countries\")"
]
},
{
"cell_type": "markdown",
"id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Base communes au types Musée"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8f988fb-5aab-4b57-80d1-e242f7e5b384",
"metadata": {},
"outputs": [],
"source": [
"companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
" 'sport': ['5', '6', '7', '8', '9'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbce1124-9a22-4502-a47a-fc3d0e2db70b",
"metadata": {},
"outputs": [],
"source": [
"companies['musee']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5080f66e-f779-410a-876d-b4fe2795e17e",
"metadata": {},
"outputs": [],
"source": [
"for i in companies['musique']:\n",
" BUCKET = \"bdc2324-data/\"+i\n",
" liste_base = []\n",
" for base in fs.ls(BUCKET):\n",
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
" if match:\n",
" nom_base = match.group(3)\n",
" liste_base.append(nom_base)\n",
" globals()['base_'+i] = liste_base\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abd477e1-7479-4c88-a5aa-f987af3f5b79",
"metadata": {},
"outputs": [],
"source": [
"# Trouver l'intersection entre les cinq listes\n",
"intersection = set(base_1).intersection(base_2, base_3, base_4, base_101)\n",
"\n",
"# Convertir le résultat en liste si nécessaire\n",
"intersection_liste = list(intersection)\n",
"\n",
"print(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d93888f-a511-4ee5-8bc3-d5173a7f119e",
"metadata": {},
"outputs": [],
"source": [
"# Trouver l'intersection entre les cinq listes\n",
"intersection = set(base_10).intersection(base_12, base_13, base_14, base_11)\n",
"\n",
"# Convertir le résultat en liste si nécessaire\n",
"intersection_liste = list(intersection)\n",
"\n",
"print(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10e89669-42bb-4652-a4bc-1a3d1caf4d1a",
"metadata": {},
"outputs": [],
"source": [
"len(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d058b21-a538-4f59-aefb-ef7966f73fdc",
"metadata": {},
"outputs": [],
"source": [
"df1_tags = load_dataset_2(\"1\", \"tags\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa441f99-733c-4675-8676-bed4682d3324",
"metadata": {},
"outputs": [],
"source": [
"df1_structure_tag_mappings = load_dataset_2(\"1\", 'structure_tag_mappings')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6767a750-14a4-4c05-903e-d2f07170825b",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus = load_dataset_2(\"1\", \"customersplus\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "125e9145-a815-46fd-bdf4-07589508b259",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus.groupby('structure_id')['id'].count().reset_index().sort_values('id', ascending=False).head(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c17a6976-792f-474d-bcff-c89396eddb3f",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus['structure_id'].isna().sum() / len(df1_customersplus['structure_id'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecfc155a-cb42-46ec-8da5-33fdcd087355",
"metadata": {},
"outputs": [],
"source": [
"len(df1_structure_tag_mappings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "071410b8-950d-4fcc-b2b9-57415253c286",
"metadata": {},
"outputs": [],
"source": [
"df1_structure_tag_mappings.groupby('tag_id')['structure_id'].count().reset_index().sort_values('structure_id', ascending=False).head(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f48d27a9-14e4-4bb9-a60a-73e9438b58fc",
"metadata": {},
"outputs": [],
"source": [
"?np.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14eaa0ea-02cc-430b-ab9b-38e6637810c3",
"metadata": {},
"outputs": [],
"source": [
"def info_colonnes_dataframe(df):\n",
" # Créer une liste pour stocker les informations sur chaque colonne\n",
" infos_colonnes = []\n",
"\n",
" # Parcourir les colonnes du DataFrame\n",
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
" # Calculer le taux de valeurs manquantes\n",
" taux_na = serie.isna().mean() * 100\n",
"\n",
" # Ajouter les informations à la liste\n",
" infos_colonnes.append({\n",
" 'Nom_colonne': nom_colonne,\n",
" 'Type_colonne': str(serie.dtype),\n",
" 'Taux_NA': taux_na\n",
" })\n",
"\n",
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
"\n",
" return df_infos_colonnes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b031c32-d4c8-42a5-9a71-a7810f9bf8d8",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"info_colonnes_dataframe(df1_tags)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1a87f27-c4d4-4832-ac20-0c3c54aa4980",
"metadata": {},
"outputs": [],
"source": [
"info_colonnes_dataframe(df1_structure_tag_mappings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa5c65a8-2f74-4f3f-85fc-9ac91e0bb361",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_colwidth', None)\n",
"\n",
"print(df1_tags['name'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a59bf932-5b54-4600-81f5-c55ac93ae510",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4ab298e-2cae-4865-9f00-4caff5f75ea1",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print(df1_tags['name'])"
]
},
{
"cell_type": "markdown",
"id": "76bffba1-5f7e-4308-9224-437ca66148f8",
"metadata": {},
"source": [
"## KPI sur target_type"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "d91d5895",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_colwidth', None)\n"
]
},
{
"cell_type": "markdown",
"id": "c58b17d3",
"metadata": {},
"source": [
"Raisonnement : on prends les target_type qui représente 90% des clients et on fait des catégories dessus."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d74426b3",
"metadata": {},
"outputs": [],
"source": [
"targets = load_dataset_2(\"3\", \"targets\")\n",
"target_types = load_dataset_2(\"3\", \"target_types\")\n",
"\n",
"# target_all = pd.merge(targets, target_types, left_on= 'target_type_id', right_on= 'id' ,how = 'inner')\n"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "6930bff5",
"metadata": {},
"outputs": [],
"source": [
"def print_main_target(tenant_id, nb_print = 40):\n",
" df_target = display_databases(tenant_id, \"target_information\")\n",
"\n",
" print('Nombre de ciblage : ', len(df_target))\n",
" nb_customers = df_target['customer_id'].nunique()\n",
" print('Nombre de client avec étiquette target : ', nb_customers) \n",
"\n",
" nb_custumers_per_target = df_target.groupby(\"target_name\")['customer_id'].count().reset_index().sort_values('customer_id', ascending=False)\n",
" nb_custumers_per_target['cumulative_customers'] = nb_custumers_per_target['customer_id'].cumsum()/len(df_target)\n",
" nb_custumers_per_target['customer_id'] = nb_custumers_per_target['customer_id']/nb_customers\n",
"\n",
" return nb_custumers_per_target.head(nb_print)"
]
},
{
"cell_type": "markdown",
"id": "5df6bccd",
"metadata": {},
"source": [
"### Entreprise 1"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "1e7ee1a0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de ciblage : 768024\n",
"Nombre de client avec étiquette target : 151159\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>161</th>\n",
" <td>consentement optin mediation specialisee</td>\n",
" <td>0.992333</td>\n",
" <td>0.195306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>consentement optin jeune public</td>\n",
" <td>0.992194</td>\n",
" <td>0.390585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>158</th>\n",
" <td>consentement optin b2c</td>\n",
" <td>0.720493</td>\n",
" <td>0.532390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Arenametrix_bascule tel vers sib</td>\n",
" <td>0.232973</td>\n",
" <td>0.578242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>consentement optout b2c</td>\n",
" <td>0.228389</td>\n",
" <td>0.623193</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>COM Inscrits NL générale (historique)</td>\n",
" <td>0.152191</td>\n",
" <td>0.653146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>consentement optin newsletter generale</td>\n",
" <td>0.146171</td>\n",
" <td>0.681915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>169</th>\n",
" <td>consentement optout newsletter generale</td>\n",
" <td>0.124736</td>\n",
" <td>0.706465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>170</th>\n",
" <td>consentement optout scolaires</td>\n",
" <td>0.104155</td>\n",
" <td>0.726964</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>consentement optout dre</td>\n",
" <td>0.094788</td>\n",
" <td>0.745620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>164</th>\n",
" <td>consentement optout b2b</td>\n",
" <td>0.094067</td>\n",
" <td>0.764134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>Inscrits NL générale (export_291019 + operation_videomaton)</td>\n",
" <td>0.093187</td>\n",
" <td>0.782474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>157</th>\n",
" <td>consentement optin b2b</td>\n",
" <td>0.084249</td>\n",
" <td>0.799056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>216</th>\n",
" <td>ddcp_visiteurs dps 010622</td>\n",
" <td>0.081735</td>\n",
" <td>0.815142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Contacts_prenomsdoubles</td>\n",
" <td>0.077025</td>\n",
" <td>0.830302</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>FORMATION _ acheteurs optin last year</td>\n",
" <td>0.069364</td>\n",
" <td>0.843954</td>\n",
" </tr>\n",
" <tr>\n",
" <th>214</th>\n",
" <td>ddcp_promo_visiteurs occasionnels_musee_8mois</td>\n",
" <td>0.043927</td>\n",
" <td>0.852600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>189</th>\n",
" <td>ddcp_promo_md_musée_dps 011019</td>\n",
" <td>0.039759</td>\n",
" <td>0.860425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>188</th>\n",
" <td>ddcp_promo_MD_billet_musée_oct_2019_agarder2</td>\n",
" <td>0.036266</td>\n",
" <td>0.867563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163</th>\n",
" <td>consentement optin scolaires</td>\n",
" <td>0.032079</td>\n",
" <td>0.873876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>159</th>\n",
" <td>consentement optin dre</td>\n",
" <td>0.029949</td>\n",
" <td>0.879771</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>DDCP Newsletter enseignants</td>\n",
" <td>0.029836</td>\n",
" <td>0.885643</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>DDCP Newsletter jeune public</td>\n",
" <td>0.025549</td>\n",
" <td>0.890671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>Inscrits NL générale site web</td>\n",
" <td>0.024689</td>\n",
" <td>0.895531</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>Votre première liste</td>\n",
" <td>0.024577</td>\n",
" <td>0.900368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>DDCP billets famille</td>\n",
" <td>0.023876</td>\n",
" <td>0.905067</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>DRE MucemLab</td>\n",
" <td>0.015229</td>\n",
" <td>0.908064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>DDCP Newsletter relais champ social</td>\n",
" <td>0.015017</td>\n",
" <td>0.911020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>DRE institutionnels</td>\n",
" <td>0.014746</td>\n",
" <td>0.913922</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>DDCP PROMO Participants ateliers (adultes et enfants)</td>\n",
" <td>0.012927</td>\n",
" <td>0.916466</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>DDCP promo Plan B 2019 (concerts)</td>\n",
" <td>0.012887</td>\n",
" <td>0.919003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>DDCP promo MD pass musées dps oct 2018</td>\n",
" <td>0.011809</td>\n",
" <td>0.921327</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>DDCP rentrée culturelle 2023</td>\n",
" <td>0.011624</td>\n",
" <td>0.923614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>DDCP MD Procès du Siècle</td>\n",
" <td>0.011141</td>\n",
" <td>0.925807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>186</th>\n",
" <td>ddcp_md_scene_ouverte_au_talent</td>\n",
" <td>0.010433</td>\n",
" <td>0.927860</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108</th>\n",
" <td>DRE chercheurs</td>\n",
" <td>0.010300</td>\n",
" <td>0.929888</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220</th>\n",
" <td>festival_jean_rouch</td>\n",
" <td>0.009937</td>\n",
" <td>0.931843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>105</th>\n",
" <td>DRE Festival Jean Rouch</td>\n",
" <td>0.009937</td>\n",
" <td>0.933799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>275</th>\n",
" <td>structures_etiquette champ social</td>\n",
" <td>0.009844</td>\n",
" <td>0.935736</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)</td>\n",
" <td>0.008554</td>\n",
" <td>0.937420</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_name \\\n",
"161 consentement optin mediation specialisee \n",
"160 consentement optin jeune public \n",
"158 consentement optin b2c \n",
"5 Arenametrix_bascule tel vers sib \n",
"165 consentement optout b2c \n",
"19 COM Inscrits NL générale (historique) \n",
"162 consentement optin newsletter generale \n",
"169 consentement optout newsletter generale \n",
"170 consentement optout scolaires \n",
"166 consentement optout dre \n",
"164 consentement optout b2b \n",
"126 Inscrits NL générale (export_291019 + operation_videomaton) \n",
"157 consentement optin b2b \n",
"216 ddcp_visiteurs dps 010622 \n",
"20 Contacts_prenomsdoubles \n",
"115 FORMATION _ acheteurs optin last year \n",
"214 ddcp_promo_visiteurs occasionnels_musee_8mois \n",
"189 ddcp_promo_md_musée_dps 011019 \n",
"188 ddcp_promo_MD_billet_musée_oct_2019_agarder2 \n",
"163 consentement optin scolaires \n",
"159 consentement optin dre \n",
"34 DDCP Newsletter enseignants \n",
"36 DDCP Newsletter jeune public \n",
"127 Inscrits NL générale site web \n",
"145 Votre première liste \n",
"61 DDCP billets famille \n",
"106 DRE MucemLab \n",
"39 DDCP Newsletter relais champ social \n",
"110 DRE institutionnels \n",
"48 DDCP PROMO Participants ateliers (adultes et enfants) \n",
"74 DDCP promo Plan B 2019 (concerts) \n",
"72 DDCP promo MD pass musées dps oct 2018 \n",
"94 DDCP rentrée culturelle 2023 \n",
"23 DDCP MD Procès du Siècle \n",
"186 ddcp_md_scene_ouverte_au_talent \n",
"108 DRE chercheurs \n",
"220 festival_jean_rouch \n",
"105 DRE Festival Jean Rouch \n",
"275 structures_etiquette champ social \n",
"86 DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers) \n",
"\n",
" customer_id cumulative_customers \n",
"161 0.992333 0.195306 \n",
"160 0.992194 0.390585 \n",
"158 0.720493 0.532390 \n",
"5 0.232973 0.578242 \n",
"165 0.228389 0.623193 \n",
"19 0.152191 0.653146 \n",
"162 0.146171 0.681915 \n",
"169 0.124736 0.706465 \n",
"170 0.104155 0.726964 \n",
"166 0.094788 0.745620 \n",
"164 0.094067 0.764134 \n",
"126 0.093187 0.782474 \n",
"157 0.084249 0.799056 \n",
"216 0.081735 0.815142 \n",
"20 0.077025 0.830302 \n",
"115 0.069364 0.843954 \n",
"214 0.043927 0.852600 \n",
"189 0.039759 0.860425 \n",
"188 0.036266 0.867563 \n",
"163 0.032079 0.873876 \n",
"159 0.029949 0.879771 \n",
"34 0.029836 0.885643 \n",
"36 0.025549 0.890671 \n",
"127 0.024689 0.895531 \n",
"145 0.024577 0.900368 \n",
"61 0.023876 0.905067 \n",
"106 0.015229 0.908064 \n",
"39 0.015017 0.911020 \n",
"110 0.014746 0.913922 \n",
"48 0.012927 0.916466 \n",
"74 0.012887 0.919003 \n",
"72 0.011809 0.921327 \n",
"94 0.011624 0.923614 \n",
"23 0.011141 0.925807 \n",
"186 0.010433 0.927860 \n",
"108 0.010300 0.929888 \n",
"220 0.009937 0.931843 \n",
"105 0.009937 0.933799 \n",
"275 0.009844 0.935736 \n",
"86 0.008554 0.937420 "
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print_main_target('1')"
]
},
{
"cell_type": "markdown",
"id": "31941e25",
"metadata": {},
"source": [
"### Entreprise 2"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "b57a28ac",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_2/target_information.csv\n",
"Nombre de ciblage : 260283\n",
"Nombre de client avec étiquette target : 233320\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Schokoladentour &amp; Führungen Individuals</td>\n",
" <td>0.927906</td>\n",
" <td>0.831783</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Chocolateria Kurse 2023</td>\n",
" <td>0.073963</td>\n",
" <td>0.898084</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>mailxpert_contacts_2023-07-18_12-04-00 langue</td>\n",
" <td>0.025519</td>\n",
" <td>0.920959</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>mailxpert_contacts_2023-07-18_12-04-00</td>\n",
" <td>0.025519</td>\n",
" <td>0.943834</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Newsletter opt-in Allgemein</td>\n",
" <td>0.022836</td>\n",
" <td>0.964304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Schokoladentour &amp; Führungen Gruppen / Schulen</td>\n",
" <td>0.011555</td>\n",
" <td>0.974662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Newsletter DE</td>\n",
" <td>0.010749</td>\n",
" <td>0.984298</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Newsletter FR</td>\n",
" <td>0.008520</td>\n",
" <td>0.991936</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Newsletter EN</td>\n",
" <td>0.004286</td>\n",
" <td>0.995778</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Frauen in Zürich - Schulung</td>\n",
" <td>0.001003</td>\n",
" <td>0.996677</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>mailxpert_contacts_2023-07-18_13-25-45_inaktiv</td>\n",
" <td>0.000471</td>\n",
" <td>0.997099</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Opt-in-Website DE</td>\n",
" <td>0.000030</td>\n",
" <td>0.997126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Opt-in Website EN</td>\n",
" <td>0.000009</td>\n",
" <td>0.997134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Opt-in Website FR</td>\n",
" <td>0.000004</td>\n",
" <td>0.997138</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Votre première liste</td>\n",
" <td>0.000004</td>\n",
" <td>0.997142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Activated contact EN</td>\n",
" <td>0.000004</td>\n",
" <td>0.997145</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Activated contact FR</td>\n",
" <td>0.000004</td>\n",
" <td>0.997149</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Activated contact DE</td>\n",
" <td>0.000004</td>\n",
" <td>0.997153</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_name customer_id \\\n",
"13 Schokoladentour & Führungen Individuals 0.927906 \n",
"3 Chocolateria Kurse 2023 0.073963 \n",
"16 mailxpert_contacts_2023-07-18_12-04-00 langue 0.025519 \n",
"15 mailxpert_contacts_2023-07-18_12-04-00 0.025519 \n",
"8 Newsletter opt-in Allgemein 0.022836 \n",
"12 Schokoladentour & Führungen Gruppen / Schulen 0.011555 \n",
"5 Newsletter DE 0.010749 \n",
"7 Newsletter FR 0.008520 \n",
"6 Newsletter EN 0.004286 \n",
"4 Frauen in Zürich - Schulung 0.001003 \n",
"17 mailxpert_contacts_2023-07-18_13-25-45_inaktiv 0.000471 \n",
"11 Opt-in-Website DE 0.000030 \n",
"9 Opt-in Website EN 0.000009 \n",
"10 Opt-in Website FR 0.000004 \n",
"14 Votre première liste 0.000004 \n",
"1 Activated contact EN 0.000004 \n",
"2 Activated contact FR 0.000004 \n",
"0 Activated contact DE 0.000004 \n",
"\n",
" cumulative_customers \n",
"13 0.831783 \n",
"3 0.898084 \n",
"16 0.920959 \n",
"15 0.943834 \n",
"8 0.964304 \n",
"12 0.974662 \n",
"5 0.984298 \n",
"7 0.991936 \n",
"6 0.995778 \n",
"4 0.996677 \n",
"17 0.997099 \n",
"11 0.997126 \n",
"9 0.997134 \n",
"10 0.997138 \n",
"14 0.997142 \n",
"1 0.997145 \n",
"2 0.997149 \n",
"0 0.997153 "
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print_main_target('2', 25)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "9a65991f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_3/target_information.csv\n",
"Nombre de ciblage : 1617362\n",
"Nombre de client avec étiquette target : 257018\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>MKG_NLmensuelle_2021_OK</td>\n",
" <td>0.972348</td>\n",
" <td>0.154518</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>MKG_NLmensuelle_2021</td>\n",
" <td>0.956007</td>\n",
" <td>0.306439</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>consent_optin_nl</td>\n",
" <td>0.636648</td>\n",
" <td>0.407609</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>consent_optin_general</td>\n",
" <td>0.602506</td>\n",
" <td>0.503355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>Mkg_NL_mensuelle3</td>\n",
" <td>0.404162</td>\n",
" <td>0.567581</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>consent_optout_general</td>\n",
" <td>0.368126</td>\n",
" <td>0.626081</td>\n",
" </tr>\n",
" <tr>\n",
" <th>104</th>\n",
" <td>TEST LOCBASE</td>\n",
" <td>0.350532</td>\n",
" <td>0.681784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>MKG_Non_inscrit_liste_08-22</td>\n",
" <td>0.310605</td>\n",
" <td>0.731143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>consent_optin_general_HISTORIQUE</td>\n",
" <td>0.301345</td>\n",
" <td>0.779030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>Mkg_Zone_C</td>\n",
" <td>0.135298</td>\n",
" <td>0.800531</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Acheteurs_100km_visite_depuismax5ans</td>\n",
" <td>0.091149</td>\n",
" <td>0.815015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Inscription NL ancien site web</td>\n",
" <td>0.083477</td>\n",
" <td>0.828281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>112</th>\n",
" <td>consent_optin_equestre</td>\n",
" <td>0.083216</td>\n",
" <td>0.841505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>Mkg_Zone_B</td>\n",
" <td>0.079889</td>\n",
" <td>0.854200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>MKG_2022_ZoneB&amp;ZoneC_Famille</td>\n",
" <td>0.072536</td>\n",
" <td>0.865727</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>consent_optin_b2b</td>\n",
" <td>0.064388</td>\n",
" <td>0.875959</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>Soft_Bounce_yahoo</td>\n",
" <td>0.064182</td>\n",
" <td>0.886158</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>Scénario Anniversaire</td>\n",
" <td>0.051249</td>\n",
" <td>0.894303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>B2B_scolaire_et_centres_de_loisirs_2023</td>\n",
" <td>0.046732</td>\n",
" <td>0.901729</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>B2B_Sans étiquette</td>\n",
" <td>0.040472</td>\n",
" <td>0.908160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>122</th>\n",
" <td>consent_optout_equestre</td>\n",
" <td>0.038865</td>\n",
" <td>0.914336</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>B2B_Inscrits newsletter Scolaires</td>\n",
" <td>0.038075</td>\n",
" <td>0.920387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>B2B_historique_newsletter_SCOLAIRES</td>\n",
" <td>0.038040</td>\n",
" <td>0.926432</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>consent_optin_jdp</td>\n",
" <td>0.036110</td>\n",
" <td>0.932170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>MKG_aire cantilienne</td>\n",
" <td>0.031908</td>\n",
" <td>0.937241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>B2B_Sans étiquette FR+BE</td>\n",
" <td>0.029165</td>\n",
" <td>0.941876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108</th>\n",
" <td>b2b - écoles mai 2021</td>\n",
" <td>0.028574</td>\n",
" <td>0.946416</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>Ouvreur_NL_juin_2021</td>\n",
" <td>0.018193</td>\n",
" <td>0.949308</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>consent_optout_jdp</td>\n",
" <td>0.016816</td>\n",
" <td>0.951980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>consent_optout_nl</td>\n",
" <td>0.016633</td>\n",
" <td>0.954623</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>B2B_CE_2023</td>\n",
" <td>0.016489</td>\n",
" <td>0.957243</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>Visiteurs Aout-Sept sans questionnaire</td>\n",
" <td>0.016275</td>\n",
" <td>0.959830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>Pass Annuel en cours de validité</td>\n",
" <td>0.011540</td>\n",
" <td>0.961663</td>\n",
" </tr>\n",
" <tr>\n",
" <th>114</th>\n",
" <td>consent_optin_expositions</td>\n",
" <td>0.011388</td>\n",
" <td>0.963473</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>B2B_Sans étiquette hors FR+BE</td>\n",
" <td>0.011307</td>\n",
" <td>0.965270</td>\n",
" </tr>\n",
" <tr>\n",
" <th>113</th>\n",
" <td>consent_optin_evenements</td>\n",
" <td>0.011240</td>\n",
" <td>0.967056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>B2B_liste_à_requalifier_CE</td>\n",
" <td>0.010742</td>\n",
" <td>0.968763</td>\n",
" </tr>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>consent_optin_abonnes_passannuels</td>\n",
" <td>0.009665</td>\n",
" <td>0.970299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>152</th>\n",
" <td>liste mécènes donateurs 01012023-31102023</td>\n",
" <td>0.008746</td>\n",
" <td>0.971689</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>B2B_liste_à_requalifier_SCOLAIRES</td>\n",
" <td>0.008688</td>\n",
" <td>0.973070</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_name customer_id \\\n",
"67 MKG_NLmensuelle_2021_OK 0.972348 \n",
"66 MKG_NLmensuelle_2021 0.956007 \n",
"119 consent_optin_nl 0.636648 \n",
"115 consent_optin_general 0.602506 \n",
"78 Mkg_NL_mensuelle3 0.404162 \n",
"125 consent_optout_general 0.368126 \n",
"104 TEST LOCBASE 0.350532 \n",
"68 MKG_Non_inscrit_liste_08-22 0.310605 \n",
"116 consent_optin_general_HISTORIQUE 0.301345 \n",
"80 Mkg_Zone_C 0.135298 \n",
"7 Acheteurs_100km_visite_depuismax5ans 0.091149 \n",
"54 Inscription NL ancien site web 0.083477 \n",
"112 consent_optin_equestre 0.083216 \n",
"79 Mkg_Zone_B 0.079889 \n",
"65 MKG_2022_ZoneB&ZoneC_Famille 0.072536 \n",
"111 consent_optin_b2b 0.064388 \n",
"102 Soft_Bounce_yahoo 0.064182 \n",
"100 Scénario Anniversaire 0.051249 \n",
"37 B2B_scolaire_et_centres_de_loisirs_2023 0.046732 \n",
"20 B2B_Sans étiquette 0.040472 \n",
"122 consent_optout_equestre 0.038865 \n",
"17 B2B_Inscrits newsletter Scolaires 0.038075 \n",
"28 B2B_historique_newsletter_SCOLAIRES 0.038040 \n",
"118 consent_optin_jdp 0.036110 \n",
"76 MKG_aire cantilienne 0.031908 \n",
"21 B2B_Sans étiquette FR+BE 0.029165 \n",
"108 b2b - écoles mai 2021 0.028574 \n",
"86 Ouvreur_NL_juin_2021 0.018193 \n",
"126 consent_optout_jdp 0.016816 \n",
"127 consent_optout_nl 0.016633 \n",
"13 B2B_CE_2023 0.016489 \n",
"106 Visiteurs Aout-Sept sans questionnaire 0.016275 \n",
"89 Pass Annuel en cours de validité 0.011540 \n",
"114 consent_optin_expositions 0.011388 \n",
"22 B2B_Sans étiquette hors FR+BE 0.011307 \n",
"113 consent_optin_evenements 0.011240 \n",
"32 B2B_liste_à_requalifier_CE 0.010742 \n",
"110 consent_optin_abonnes_passannuels 0.009665 \n",
"152 liste mécènes donateurs 01012023-31102023 0.008746 \n",
"34 B2B_liste_à_requalifier_SCOLAIRES 0.008688 \n",
"\n",
" cumulative_customers \n",
"67 0.154518 \n",
"66 0.306439 \n",
"119 0.407609 \n",
"115 0.503355 \n",
"78 0.567581 \n",
"125 0.626081 \n",
"104 0.681784 \n",
"68 0.731143 \n",
"116 0.779030 \n",
"80 0.800531 \n",
"7 0.815015 \n",
"54 0.828281 \n",
"112 0.841505 \n",
"79 0.854200 \n",
"65 0.865727 \n",
"111 0.875959 \n",
"102 0.886158 \n",
"100 0.894303 \n",
"37 0.901729 \n",
"20 0.908160 \n",
"122 0.914336 \n",
"17 0.920387 \n",
"28 0.926432 \n",
"118 0.932170 \n",
"76 0.937241 \n",
"21 0.941876 \n",
"108 0.946416 \n",
"86 0.949308 \n",
"126 0.951980 \n",
"127 0.954623 \n",
"13 0.957243 \n",
"106 0.959830 \n",
"89 0.961663 \n",
"114 0.963473 \n",
"22 0.965270 \n",
"113 0.967056 \n",
"32 0.968763 \n",
"110 0.970299 \n",
"152 0.971689 \n",
"34 0.973070 "
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print_main_target('3', 40)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "c66a4dc1",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)\n"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "5f34b8bf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_4/target_information.csv\n",
"Nombre de ciblage : 4627640\n",
"Nombre de client avec étiquette target : 320813\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>232</th>\n",
" <td>Tous les contacts mis à jour</td>\n",
" <td>0.999991</td>\n",
" <td>0.069325</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>Base données</td>\n",
" <td>0.999991</td>\n",
" <td>0.138650</td>\n",
" </tr>\n",
" <tr>\n",
" <th>191</th>\n",
" <td>Office de Tourisme</td>\n",
" <td>0.999991</td>\n",
" <td>0.207974</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>Globale sans VIP</td>\n",
" <td>0.955488</td>\n",
" <td>0.274214</td>\n",
" </tr>\n",
" <tr>\n",
" <th>112</th>\n",
" <td>Contacts structures</td>\n",
" <td>0.929969</td>\n",
" <td>0.338684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>Cible gratuité IMA COMEDY</td>\n",
" <td>0.636246</td>\n",
" <td>0.382792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>2 IEME ENVOI IMA COMEDY CLUB</td>\n",
" <td>0.630389</td>\n",
" <td>0.426494</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>Actions Marketing ARABOFOLIES</td>\n",
" <td>0.627917</td>\n",
" <td>0.470025</td>\n",
" </tr>\n",
" <tr>\n",
" <th>171</th>\n",
" <td>Liste globale sans VIP</td>\n",
" <td>0.582183</td>\n",
" <td>0.510385</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>Formulaire inscription mallette \"Cultures en partage\"</td>\n",
" <td>0.532831</td>\n",
" <td>0.547324</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>CAMPAGNE ADHESION 2023</td>\n",
" <td>0.449371</td>\n",
" <td>0.578477</td>\n",
" </tr>\n",
" <tr>\n",
" <th>234</th>\n",
" <td>Tous les optins</td>\n",
" <td>0.412546</td>\n",
" <td>0.607076</td>\n",
" </tr>\n",
" <tr>\n",
" <th>192</th>\n",
" <td>Optin 2023</td>\n",
" <td>0.365057</td>\n",
" <td>0.632384</td>\n",
" </tr>\n",
" <tr>\n",
" <th>170</th>\n",
" <td>Liste globale optin-15-01-2021</td>\n",
" <td>0.325482</td>\n",
" <td>0.654948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10-03-sb-dolist</td>\n",
" <td>0.193833</td>\n",
" <td>0.668386</td>\n",
" </tr>\n",
" <tr>\n",
" <th>195</th>\n",
" <td>Origine - Nouba</td>\n",
" <td>0.192452</td>\n",
" <td>0.681728</td>\n",
" </tr>\n",
" <tr>\n",
" <th>158</th>\n",
" <td>LIVE2022_Intérêt Expositions musée</td>\n",
" <td>0.173550</td>\n",
" <td>0.693759</td>\n",
" </tr>\n",
" <tr>\n",
" <th>414</th>\n",
" <td>old_Intéressés par la Musique</td>\n",
" <td>0.166505</td>\n",
" <td>0.705302</td>\n",
" </tr>\n",
" <tr>\n",
" <th>415</th>\n",
" <td>old_Intérêt Danse</td>\n",
" <td>0.163572</td>\n",
" <td>0.716642</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>Cible offre DAOUD DEPARDON</td>\n",
" <td>0.130372</td>\n",
" <td>0.725680</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>13-04-2022-vente 2021</td>\n",
" <td>0.128804</td>\n",
" <td>0.734609</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>Arabofolies Juillet 2022</td>\n",
" <td>0.109123</td>\n",
" <td>0.742174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>Intérêt LGBTQ+</td>\n",
" <td>0.108917</td>\n",
" <td>0.749725</td>\n",
" </tr>\n",
" <tr>\n",
" <th>274</th>\n",
" <td>blacklistés ima</td>\n",
" <td>0.105407</td>\n",
" <td>0.757032</td>\n",
" </tr>\n",
" <tr>\n",
" <th>208</th>\n",
" <td>Public traditionnel</td>\n",
" <td>0.083821</td>\n",
" <td>0.762843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>159</th>\n",
" <td>LIVE2022_Intérêt Humour</td>\n",
" <td>0.082858</td>\n",
" <td>0.768587</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>Cible jeunes humour</td>\n",
" <td>0.080312</td>\n",
" <td>0.774155</td>\n",
" </tr>\n",
" <tr>\n",
" <th>213</th>\n",
" <td>Relance gratuité IMA COMEDY CLUB</td>\n",
" <td>0.079205</td>\n",
" <td>0.779646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101</th>\n",
" <td>Cible rencontres et débats</td>\n",
" <td>0.076743</td>\n",
" <td>0.784966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>189</th>\n",
" <td>Nouveaux inscrits newsletter</td>\n",
" <td>0.076200</td>\n",
" <td>0.790249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>349</th>\n",
" <td>interet nuit du cinéma</td>\n",
" <td>0.072260</td>\n",
" <td>0.795258</td>\n",
" </tr>\n",
" <tr>\n",
" <th>141</th>\n",
" <td>Intérêt prononcé pour la nuit du ramadan</td>\n",
" <td>0.072254</td>\n",
" <td>0.800267</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>Cible Algérie</td>\n",
" <td>0.070337</td>\n",
" <td>0.805143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93</th>\n",
" <td>Cible News offre spéciale humour</td>\n",
" <td>0.069296</td>\n",
" <td>0.809947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Intérêt musique électro</td>\n",
" <td>0.068716</td>\n",
" <td>0.814711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>280</th>\n",
" <td>cible Histoire et feminisme</td>\n",
" <td>0.068585</td>\n",
" <td>0.819466</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174</th>\n",
" <td>Liste relais pour présentation 2023</td>\n",
" <td>0.067404</td>\n",
" <td>0.824139</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>Intérêt musique Orientale</td>\n",
" <td>0.066082</td>\n",
" <td>0.828720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>16-07-21-nuit-du-cinema</td>\n",
" <td>0.065166</td>\n",
" <td>0.833237</td>\n",
" </tr>\n",
" <tr>\n",
" <th>204</th>\n",
" <td>Profil Client Expos Divas (Geo)</td>\n",
" <td>0.063401</td>\n",
" <td>0.837633</td>\n",
" </tr>\n",
" <tr>\n",
" <th>265</th>\n",
" <td>araborolies/Divas/relance</td>\n",
" <td>0.061647</td>\n",
" <td>0.841906</td>\n",
" </tr>\n",
" <tr>\n",
" <th>203</th>\n",
" <td>Profil Client Expo Divas</td>\n",
" <td>0.061138</td>\n",
" <td>0.846145</td>\n",
" </tr>\n",
" <tr>\n",
" <th>233</th>\n",
" <td>Tous les inscrits aux newsletters via le formulaire du site web</td>\n",
" <td>0.057407</td>\n",
" <td>0.850125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>243</th>\n",
" <td>VIP Générale</td>\n",
" <td>0.053682</td>\n",
" <td>0.853846</td>\n",
" </tr>\n",
" <tr>\n",
" <th>226</th>\n",
" <td>Strcutures sans VIP</td>\n",
" <td>0.053396</td>\n",
" <td>0.857548</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>Agi pour buren</td>\n",
" <td>0.051575</td>\n",
" <td>0.861123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>Invitation à l'exposition Palestine LANG</td>\n",
" <td>0.051092</td>\n",
" <td>0.864665</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>Acheteurs individuels de l'expo Juifs d'orient statique</td>\n",
" <td>0.046526</td>\n",
" <td>0.867891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>Acheteurs individuels de l'expo Juifs d'orient</td>\n",
" <td>0.046513</td>\n",
" <td>0.871115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>Cible arabic Sound system</td>\n",
" <td>0.046164</td>\n",
" <td>0.874316</td>\n",
" </tr>\n",
" <tr>\n",
" <th>244</th>\n",
" <td>VIP STATIQUE</td>\n",
" <td>0.041158</td>\n",
" <td>0.877169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>245</th>\n",
" <td>VIP Téléchargement</td>\n",
" <td>0.040737</td>\n",
" <td>0.879993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>Cible scolaire 2022</td>\n",
" <td>0.040313</td>\n",
" <td>0.882788</td>\n",
" </tr>\n",
" <tr>\n",
" <th>90</th>\n",
" <td>Cible Maroc</td>\n",
" <td>0.039827</td>\n",
" <td>0.885549</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91</th>\n",
" <td>Cible Maroc</td>\n",
" <td>0.039827</td>\n",
" <td>0.888310</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>26mai-2023-Structures-invit-palestine</td>\n",
" <td>0.039188</td>\n",
" <td>0.891027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>393</th>\n",
" <td>liste_contacts_agi_2021_02_16_</td>\n",
" <td>0.033618</td>\n",
" <td>0.893357</td>\n",
" </tr>\n",
" <tr>\n",
" <th>450</th>\n",
" <td>sb-fichier-eudonet-ok-18-05-21</td>\n",
" <td>0.032056</td>\n",
" <td>0.895579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>04_11_22_eudonet</td>\n",
" <td>0.031857</td>\n",
" <td>0.897788</td>\n",
" </tr>\n",
" <tr>\n",
" <th>215</th>\n",
" <td>SB-18-05-VIP-eudonet</td>\n",
" <td>0.031857</td>\n",
" <td>0.899996</td>\n",
" </tr>\n",
" <tr>\n",
" <th>175</th>\n",
" <td>Liste vernissage</td>\n",
" <td>0.031364</td>\n",
" <td>0.902171</td>\n",
" </tr>\n",
" <tr>\n",
" <th>235</th>\n",
" <td>Tous les relais</td>\n",
" <td>0.031090</td>\n",
" <td>0.904326</td>\n",
" </tr>\n",
" <tr>\n",
" <th>252</th>\n",
" <td>Visiteurs expo pour questionnaires</td>\n",
" <td>0.029930</td>\n",
" <td>0.906401</td>\n",
" </tr>\n",
" <tr>\n",
" <th>223</th>\n",
" <td>Scolaires - Actions Educatives 24/11/2021</td>\n",
" <td>0.029871</td>\n",
" <td>0.908472</td>\n",
" </tr>\n",
" <tr>\n",
" <th>92</th>\n",
" <td>Cible Musique Judeo-arabe</td>\n",
" <td>0.029266</td>\n",
" <td>0.910501</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>15-09-2023-Cible-Palestine</td>\n",
" <td>0.028531</td>\n",
" <td>0.912478</td>\n",
" </tr>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>LIVE2022_Intérêts Rencontres, débats et conférences</td>\n",
" <td>0.026928</td>\n",
" <td>0.914345</td>\n",
" </tr>\n",
" <tr>\n",
" <th>282</th>\n",
" <td>cible photo</td>\n",
" <td>0.026056</td>\n",
" <td>0.916152</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>26-MAI_STRUCTURE-2023-OK</td>\n",
" <td>0.025495</td>\n",
" <td>0.917919</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>07-12-20-Relais-invitatation-divas</td>\n",
" <td>0.024909</td>\n",
" <td>0.919646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>410</th>\n",
" <td>old_Amis de l'IMA</td>\n",
" <td>0.023160</td>\n",
" <td>0.921251</td>\n",
" </tr>\n",
" <tr>\n",
" <th>222</th>\n",
" <td>Scolaires - Actions Educatives 24/01/2023</td>\n",
" <td>0.022724</td>\n",
" <td>0.922827</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>PALESTINE</td>\n",
" <td>0.020903</td>\n",
" <td>0.924276</td>\n",
" </tr>\n",
" <tr>\n",
" <th>249</th>\n",
" <td>Vignes et tilleuls</td>\n",
" <td>0.020439</td>\n",
" <td>0.925693</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>26-mai-11H10-relais</td>\n",
" <td>0.019578</td>\n",
" <td>0.927050</td>\n",
" </tr>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>Contacts Librairie</td>\n",
" <td>0.019114</td>\n",
" <td>0.928375</td>\n",
" </tr>\n",
" <tr>\n",
" <th>194</th>\n",
" <td>Origine - Inscription manuelle</td>\n",
" <td>0.018307</td>\n",
" <td>0.929644</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>Origine - QR code</td>\n",
" <td>0.018294</td>\n",
" <td>0.930913</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>Acheteurs Daoud Depardon</td>\n",
" <td>0.018232</td>\n",
" <td>0.932176</td>\n",
" </tr>\n",
" <tr>\n",
" <th>473</th>\n",
" <td>événements autour de Habibi</td>\n",
" <td>0.017755</td>\n",
" <td>0.933407</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_name \\\n",
"232 Tous les contacts mis à jour \n",
"76 Base données \n",
"191 Office de Tourisme \n",
"128 Globale sans VIP \n",
"112 Contacts structures \n",
"98 Cible gratuité IMA COMEDY \n",
"23 2 IEME ENVOI IMA COMEDY CLUB \n",
"64 Actions Marketing ARABOFOLIES \n",
"171 Liste globale sans VIP \n",
"126 Formulaire inscription mallette \"Cultures en partage\" \n",
"78 CAMPAGNE ADHESION 2023 \n",
"234 Tous les optins \n",
"192 Optin 2023 \n",
"170 Liste globale optin-15-01-2021 \n",
"10 10-03-sb-dolist \n",
"195 Origine - Nouba \n",
"158 LIVE2022_Intérêt Expositions musée \n",
"414 old_Intéressés par la Musique \n",
"415 old_Intérêt Danse \n",
"100 Cible offre DAOUD DEPARDON \n",
"12 13-04-2022-vente 2021 \n",
"73 Arabofolies Juillet 2022 \n",
"137 Intérêt LGBTQ+ \n",
"274 blacklistés ima \n",
"208 Public traditionnel \n",
"159 LIVE2022_Intérêt Humour \n",
"99 Cible jeunes humour \n",
"213 Relance gratuité IMA COMEDY CLUB \n",
"101 Cible rencontres et débats \n",
"189 Nouveaux inscrits newsletter \n",
"349 interet nuit du cinéma \n",
"141 Intérêt prononcé pour la nuit du ramadan \n",
"87 Cible Algérie \n",
"93 Cible News offre spéciale humour \n",
"140 Intérêt musique électro \n",
"280 cible Histoire et feminisme \n",
"174 Liste relais pour présentation 2023 \n",
"138 Intérêt musique Orientale \n",
"18 16-07-21-nuit-du-cinema \n",
"204 Profil Client Expos Divas (Geo) \n",
"265 araborolies/Divas/relance \n",
"203 Profil Client Expo Divas \n",
"233 Tous les inscrits aux newsletters via le formulaire du site web \n",
"243 VIP Générale \n",
"226 Strcutures sans VIP \n",
"67 Agi pour buren \n",
"144 Invitation à l'exposition Palestine LANG \n",
"62 Acheteurs individuels de l'expo Juifs d'orient statique \n",
"61 Acheteurs individuels de l'expo Juifs d'orient \n",
"95 Cible arabic Sound system \n",
"244 VIP STATIQUE \n",
"245 VIP Téléchargement \n",
"102 Cible scolaire 2022 \n",
"90 Cible Maroc \n",
"91 Cible Maroc \n",
"41 26mai-2023-Structures-invit-palestine \n",
"393 liste_contacts_agi_2021_02_16_ \n",
"450 sb-fichier-eudonet-ok-18-05-21 \n",
"4 04_11_22_eudonet \n",
"215 SB-18-05-VIP-eudonet \n",
"175 Liste vernissage \n",
"235 Tous les relais \n",
"252 Visiteurs expo pour questionnaires \n",
"223 Scolaires - Actions Educatives 24/11/2021 \n",
"92 Cible Musique Judeo-arabe \n",
"13 15-09-2023-Cible-Palestine \n",
"162 LIVE2022_Intérêts Rencontres, débats et conférences \n",
"282 cible photo \n",
"38 26-MAI_STRUCTURE-2023-OK \n",
"5 07-12-20-Relais-invitatation-divas \n",
"410 old_Amis de l'IMA \n",
"222 Scolaires - Actions Educatives 24/01/2023 \n",
"198 PALESTINE \n",
"249 Vignes et tilleuls \n",
"39 26-mai-11H10-relais \n",
"110 Contacts Librairie \n",
"194 Origine - Inscription manuelle \n",
"196 Origine - QR code \n",
"59 Acheteurs Daoud Depardon \n",
"473 événements autour de Habibi \n",
"\n",
" customer_id cumulative_customers \n",
"232 0.999991 0.069325 \n",
"76 0.999991 0.138650 \n",
"191 0.999991 0.207974 \n",
"128 0.955488 0.274214 \n",
"112 0.929969 0.338684 \n",
"98 0.636246 0.382792 \n",
"23 0.630389 0.426494 \n",
"64 0.627917 0.470025 \n",
"171 0.582183 0.510385 \n",
"126 0.532831 0.547324 \n",
"78 0.449371 0.578477 \n",
"234 0.412546 0.607076 \n",
"192 0.365057 0.632384 \n",
"170 0.325482 0.654948 \n",
"10 0.193833 0.668386 \n",
"195 0.192452 0.681728 \n",
"158 0.173550 0.693759 \n",
"414 0.166505 0.705302 \n",
"415 0.163572 0.716642 \n",
"100 0.130372 0.725680 \n",
"12 0.128804 0.734609 \n",
"73 0.109123 0.742174 \n",
"137 0.108917 0.749725 \n",
"274 0.105407 0.757032 \n",
"208 0.083821 0.762843 \n",
"159 0.082858 0.768587 \n",
"99 0.080312 0.774155 \n",
"213 0.079205 0.779646 \n",
"101 0.076743 0.784966 \n",
"189 0.076200 0.790249 \n",
"349 0.072260 0.795258 \n",
"141 0.072254 0.800267 \n",
"87 0.070337 0.805143 \n",
"93 0.069296 0.809947 \n",
"140 0.068716 0.814711 \n",
"280 0.068585 0.819466 \n",
"174 0.067404 0.824139 \n",
"138 0.066082 0.828720 \n",
"18 0.065166 0.833237 \n",
"204 0.063401 0.837633 \n",
"265 0.061647 0.841906 \n",
"203 0.061138 0.846145 \n",
"233 0.057407 0.850125 \n",
"243 0.053682 0.853846 \n",
"226 0.053396 0.857548 \n",
"67 0.051575 0.861123 \n",
"144 0.051092 0.864665 \n",
"62 0.046526 0.867891 \n",
"61 0.046513 0.871115 \n",
"95 0.046164 0.874316 \n",
"244 0.041158 0.877169 \n",
"245 0.040737 0.879993 \n",
"102 0.040313 0.882788 \n",
"90 0.039827 0.885549 \n",
"91 0.039827 0.888310 \n",
"41 0.039188 0.891027 \n",
"393 0.033618 0.893357 \n",
"450 0.032056 0.895579 \n",
"4 0.031857 0.897788 \n",
"215 0.031857 0.899996 \n",
"175 0.031364 0.902171 \n",
"235 0.031090 0.904326 \n",
"252 0.029930 0.906401 \n",
"223 0.029871 0.908472 \n",
"92 0.029266 0.910501 \n",
"13 0.028531 0.912478 \n",
"162 0.026928 0.914345 \n",
"282 0.026056 0.916152 \n",
"38 0.025495 0.917919 \n",
"5 0.024909 0.919646 \n",
"410 0.023160 0.921251 \n",
"222 0.022724 0.922827 \n",
"198 0.020903 0.924276 \n",
"249 0.020439 0.925693 \n",
"39 0.019578 0.927050 \n",
"110 0.019114 0.928375 \n",
"194 0.018307 0.929644 \n",
"196 0.018294 0.930913 \n",
"59 0.018232 0.932176 \n",
"473 0.017755 0.933407 "
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print_main_target('4', 80)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "40fe3676",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_101/target_information.csv\n",
"Nombre de ciblage : 22054795\n",
"Nombre de client avec étiquette target : 2760649\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_name</th>\n",
" <th>customer_id</th>\n",
" <th>cumulative_customers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>304</th>\n",
" <td>acheteurs globaux sans gratuités</td>\n",
" <td>0.778807</td>\n",
" <td>0.097485</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>(Synchro Code Postal - Arenametrix)</td>\n",
" <td>0.622788</td>\n",
" <td>0.175441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>Acheteurs depuis 3 ans</td>\n",
" <td>0.531521</td>\n",
" <td>0.241973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>(Synchro Date de naissance - Arenametrix)</td>\n",
" <td>0.348903</td>\n",
" <td>0.285646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200</th>\n",
" <td>Consentements ALL</td>\n",
" <td>0.285676</td>\n",
" <td>0.321404</td>\n",
" </tr>\n",
" <tr>\n",
" <th>324</th>\n",
" <td>consentement optin culturespaces</td>\n",
" <td>0.263446</td>\n",
" <td>0.354381</td>\n",
" </tr>\n",
" <tr>\n",
" <th>303</th>\n",
" <td>[Auto] - Acheteurs depuis 1 an</td>\n",
" <td>0.229370</td>\n",
" <td>0.383091</td>\n",
" </tr>\n",
" <tr>\n",
" <th>334</th>\n",
" <td>consentement optout adl individuels</td>\n",
" <td>0.199532</td>\n",
" <td>0.408067</td>\n",
" </tr>\n",
" <tr>\n",
" <th>316</th>\n",
" <td>consentement optin adl individuels</td>\n",
" <td>0.180665</td>\n",
" <td>0.430682</td>\n",
" </tr>\n",
" <tr>\n",
" <th>270</th>\n",
" <td>Optins_ADL</td>\n",
" <td>0.174684</td>\n",
" <td>0.452547</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>ADL_acheteursADL_France</td>\n",
" <td>0.144103</td>\n",
" <td>0.470585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>ADL France Luxembourg __</td>\n",
" <td>0.138661</td>\n",
" <td>0.487941</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>ADL France opt-in</td>\n",
" <td>0.138359</td>\n",
" <td>0.505260</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>ADL optin 1x</td>\n",
" <td>0.137769</td>\n",
" <td>0.522505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>ADL envoi Cezanne</td>\n",
" <td>0.134868</td>\n",
" <td>0.539387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>269</th>\n",
" <td>Nova_Global BDL</td>\n",
" <td>0.126884</td>\n",
" <td>0.555269</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>Analyse_ADL</td>\n",
" <td>0.114731</td>\n",
" <td>0.569630</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>Boutemont 200km</td>\n",
" <td>0.114117</td>\n",
" <td>0.583914</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>ADL_acheteursADL_France_ALL18mois</td>\n",
" <td>0.112623</td>\n",
" <td>0.598012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>Acheteurs BDL hors gratuité</td>\n",
" <td>0.107479</td>\n",
" <td>0.611465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>ADL clients Ile de France</td>\n",
" <td>0.101688</td>\n",
" <td>0.624194</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>ADL_acheteursADL_IDF</td>\n",
" <td>0.099636</td>\n",
" <td>0.636665</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>Blacklistés</td>\n",
" <td>0.095542</td>\n",
" <td>0.648624</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>ADL optin petit couronne</td>\n",
" <td>0.081259</td>\n",
" <td>0.658796</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>ADL 15km</td>\n",
" <td>0.078260</td>\n",
" <td>0.668592</td>\n",
" </tr>\n",
" <tr>\n",
" <th>357</th>\n",
" <td>tarif famille</td>\n",
" <td>0.076298</td>\n",
" <td>0.678142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>ADL_acheteursADL_IDF_ALL18mois</td>\n",
" <td>0.076005</td>\n",
" <td>0.687656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>CAH MJA France-Belgique-Suisse-Lux</td>\n",
" <td>0.073566</td>\n",
" <td>0.696864</td>\n",
" </tr>\n",
" <tr>\n",
" <th>336</th>\n",
" <td>consentement optout bdl individuels</td>\n",
" <td>0.070118</td>\n",
" <td>0.705641</td>\n",
" </tr>\n",
" <tr>\n",
" <th>351</th>\n",
" <td>destinataires_campagne_boutemont</td>\n",
" <td>0.067179</td>\n",
" <td>0.714050</td>\n",
" </tr>\n",
" <tr>\n",
" <th>340</th>\n",
" <td>consentement optout cdl individuels</td>\n",
" <td>0.067175</td>\n",
" <td>0.722458</td>\n",
" </tr>\n",
" <tr>\n",
" <th>347</th>\n",
" <td>consentement optout mja individuels</td>\n",
" <td>0.066109</td>\n",
" <td>0.730734</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>ADL_Acheteurs_Dali_depuis050521 -</td>\n",
" <td>0.065303</td>\n",
" <td>0.738908</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>ADL 1fois France hors IDF</td>\n",
" <td>0.063167</td>\n",
" <td>0.746814</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>ADL FR OPTIN emails ouverts</td>\n",
" <td>0.052960</td>\n",
" <td>0.753444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>BDL_anciensacheteursBDL_FRANCE_v2</td>\n",
" <td>0.051593</td>\n",
" <td>0.759902</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>ADL 15km femme</td>\n",
" <td>0.051241</td>\n",
" <td>0.766316</td>\n",
" </tr>\n",
" <tr>\n",
" <th>205</th>\n",
" <td>Export 2 scénario Anniversaire</td>\n",
" <td>0.051113</td>\n",
" <td>0.772713</td>\n",
" </tr>\n",
" <tr>\n",
" <th>204</th>\n",
" <td>Export 1 scénario Anniversaire</td>\n",
" <td>0.050358</td>\n",
" <td>0.779017</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>Analyse_BDL</td>\n",
" <td>0.048561</td>\n",
" <td>0.785095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>ADL 1fois IDF hors Paris</td>\n",
" <td>0.042128</td>\n",
" <td>0.790369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>Analyse_MJA</td>\n",
" <td>0.041119</td>\n",
" <td>0.795516</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>BDL_Anciens acheteurs Tintin_v2</td>\n",
" <td>0.039127</td>\n",
" <td>0.800413</td>\n",
" </tr>\n",
" <tr>\n",
" <th>91</th>\n",
" <td>BDL - Jumeaux - anciens acheteurs Tintin</td>\n",
" <td>0.038955</td>\n",
" <td>0.805289</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>ADL optin 2+</td>\n",
" <td>0.038283</td>\n",
" <td>0.810081</td>\n",
" </tr>\n",
" <tr>\n",
" <th>247</th>\n",
" <td>Liste boutemont</td>\n",
" <td>0.033839</td>\n",
" <td>0.814317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>ADL clients FR LUX fidélité +2</td>\n",
" <td>0.033168</td>\n",
" <td>0.818469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>ADL France 2-14 visites</td>\n",
" <td>0.033107</td>\n",
" <td>0.822613</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>ADL 1fois Paris</td>\n",
" <td>0.032402</td>\n",
" <td>0.826669</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>BDL_AcheteursMRC_depuis060521 - Old</td>\n",
" <td>0.032150</td>\n",
" <td>0.830693</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>BDL_AcheteursMRC_depuis060521 - 07/12/2021</td>\n",
" <td>0.032150</td>\n",
" <td>0.834717</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>BDL_anciensacheteursBDL_GIRONDE_v2</td>\n",
" <td>0.031237</td>\n",
" <td>0.838627</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>Analyse_CDL</td>\n",
" <td>0.030649</td>\n",
" <td>0.842464</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>ADL -20km +30ans</td>\n",
" <td>0.027940</td>\n",
" <td>0.845961</td>\n",
" </tr>\n",
" <tr>\n",
" <th>322</th>\n",
" <td>consentement optin cdl individuels</td>\n",
" <td>0.027214</td>\n",
" <td>0.849368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>ADL ile de france fidélité+2</td>\n",
" <td>0.025974</td>\n",
" <td>0.852619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>Anniversaire</td>\n",
" <td>0.025558</td>\n",
" <td>0.855818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>ADL 30-50 ans FID 1</td>\n",
" <td>0.024735</td>\n",
" <td>0.858914</td>\n",
" </tr>\n",
" <tr>\n",
" <th>150</th>\n",
" <td>CAH_ADL</td>\n",
" <td>0.024249</td>\n",
" <td>0.861950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>344</th>\n",
" <td>consentement optout hdc individuels</td>\n",
" <td>0.024003</td>\n",
" <td>0.864954</td>\n",
" </tr>\n",
" <tr>\n",
" <th>314</th>\n",
" <td>consentement bdl individuels</td>\n",
" <td>0.023525</td>\n",
" <td>0.867899</td>\n",
" </tr>\n",
" <tr>\n",
" <th>318</th>\n",
" <td>consentement optin bdl individuels</td>\n",
" <td>0.023207</td>\n",
" <td>0.870804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>122</th>\n",
" <td>BDL_anciens acheteurs BDL</td>\n",
" <td>0.022925</td>\n",
" <td>0.873673</td>\n",
" </tr>\n",
" <tr>\n",
" <th>183</th>\n",
" <td>CDL_Tous Optins</td>\n",
" <td>0.022568</td>\n",
" <td>0.876498</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>CDL France optin</td>\n",
" <td>0.021642</td>\n",
" <td>0.879207</td>\n",
" </tr>\n",
" <tr>\n",
" <th>167</th>\n",
" <td>CDL Optin_France_Belgique_Luxembourg</td>\n",
" <td>0.021339</td>\n",
" <td>0.881878</td>\n",
" </tr>\n",
" <tr>\n",
" <th>175</th>\n",
" <td>CDL optin_France</td>\n",
" <td>0.020988</td>\n",
" <td>0.884505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>ADL IDF 30-55ans hors Chagall</td>\n",
" <td>0.020071</td>\n",
" <td>0.887018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>327</th>\n",
" <td>consentement optin hdc individuels</td>\n",
" <td>0.019767</td>\n",
" <td>0.889492</td>\n",
" </tr>\n",
" <tr>\n",
" <th>271</th>\n",
" <td>Optins_BDL</td>\n",
" <td>0.019633</td>\n",
" <td>0.891949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123</th>\n",
" <td>BDL_anciens acheteurs BDL FRANCE</td>\n",
" <td>0.019280</td>\n",
" <td>0.894363</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>BDL opt-in France</td>\n",
" <td>0.019050</td>\n",
" <td>0.896747</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>ADL - nouveaux visiteurs Tintin</td>\n",
" <td>0.018092</td>\n",
" <td>0.899012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>274</th>\n",
" <td>Optins_HDC</td>\n",
" <td>0.017418</td>\n",
" <td>0.901192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94</th>\n",
" <td>BDL Nouvelle-Aquitaine optin</td>\n",
" <td>0.017348</td>\n",
" <td>0.903364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>ADL tarif famille</td>\n",
" <td>0.017094</td>\n",
" <td>0.905503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>ADL Cosmos mai 2022</td>\n",
" <td>0.015652</td>\n",
" <td>0.907462</td>\n",
" </tr>\n",
" <tr>\n",
" <th>172</th>\n",
" <td>CDL intégrales -150km hors édition 2023</td>\n",
" <td>0.014517</td>\n",
" <td>0.909280</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>ADL 2+ IDF hors Paris</td>\n",
" <td>0.014415</td>\n",
" <td>0.911084</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>BDL optin -60km</td>\n",
" <td>0.013695</td>\n",
" <td>0.912798</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>BDL_anciens acheteurs BDL GIRONDE</td>\n",
" <td>0.013515</td>\n",
" <td>0.914490</td>\n",
" </tr>\n",
" <tr>\n",
" <th>264</th>\n",
" <td>Mailing groupes ZAO</td>\n",
" <td>0.013343</td>\n",
" <td>0.916160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>191</th>\n",
" <td>Clients CDL -150km hors Intégrales 2022</td>\n",
" <td>0.013056</td>\n",
" <td>0.917794</td>\n",
" </tr>\n",
" <tr>\n",
" <th>227</th>\n",
" <td>HDC_CAH_zaowouki</td>\n",
" <td>0.012870</td>\n",
" <td>0.919405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>222</th>\n",
" <td>HDC france_optin</td>\n",
" <td>0.012835</td>\n",
" <td>0.921012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>ADL 2+ Paris</td>\n",
" <td>0.012601</td>\n",
" <td>0.922589</td>\n",
" </tr>\n",
" <tr>\n",
" <th>325</th>\n",
" <td>consentement optin fdl individuels</td>\n",
" <td>0.011658</td>\n",
" <td>0.924048</td>\n",
" </tr>\n",
" <tr>\n",
" <th>180</th>\n",
" <td>CDL sauf cezanne -100km</td>\n",
" <td>0.011013</td>\n",
" <td>0.925427</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>ADL 2+ France hors IDF</td>\n",
" <td>0.010999</td>\n",
" <td>0.926804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>213</th>\n",
" <td>HDC -40km</td>\n",
" <td>0.010833</td>\n",
" <td>0.928160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>199</th>\n",
" <td>Clients américains ADL optin</td>\n",
" <td>0.010710</td>\n",
" <td>0.929500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>190</th>\n",
" <td>Clients ADL USA optin</td>\n",
" <td>0.010700</td>\n",
" <td>0.930840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>193</th>\n",
" <td>Clients HDC &gt;100km</td>\n",
" <td>0.010622</td>\n",
" <td>0.932169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>ADL_acheteursOcéans_ALL</td>\n",
" <td>0.010540</td>\n",
" <td>0.933489</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>ADL - Formulaire inscription NL grand public OU scolaires</td>\n",
" <td>0.010446</td>\n",
" <td>0.934796</td>\n",
" </tr>\n",
" <tr>\n",
" <th>302</th>\n",
" <td>[AUTO] - Inactifs scénario parralèle</td>\n",
" <td>0.010419</td>\n",
" <td>0.936100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>Acheteurs Dali ADL 2021</td>\n",
" <td>0.009927</td>\n",
" <td>0.937343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>ADL IDF +55ans hors Chagall</td>\n",
" <td>0.009800</td>\n",
" <td>0.938570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>ADL FR visiteurs Dali</td>\n",
" <td>0.009441</td>\n",
" <td>0.939751</td>\n",
" </tr>\n",
" <tr>\n",
" <th>214</th>\n",
" <td>HDC &gt;100km</td>\n",
" <td>0.008993</td>\n",
" <td>0.940877</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_name customer_id \\\n",
"304 acheteurs globaux sans gratuités 0.778807 \n",
"1 (Synchro Code Postal - Arenametrix) 0.622788 \n",
"76 Acheteurs depuis 3 ans 0.531521 \n",
"2 (Synchro Date de naissance - Arenametrix) 0.348903 \n",
"200 Consentements ALL 0.285676 \n",
"324 consentement optin culturespaces 0.263446 \n",
"303 [Auto] - Acheteurs depuis 1 an 0.229370 \n",
"334 consentement optout adl individuels 0.199532 \n",
"316 consentement optin adl individuels 0.180665 \n",
"270 Optins_ADL 0.174684 \n",
"57 ADL_acheteursADL_France 0.144103 \n",
"25 ADL France Luxembourg __ 0.138661 \n",
"26 ADL France opt-in 0.138359 \n",
"43 ADL optin 1x 0.137769 \n",
"41 ADL envoi Cezanne 0.134868 \n",
"269 Nova_Global BDL 0.126884 \n",
"78 Analyse_ADL 0.114731 \n",
"140 Boutemont 200km 0.114117 \n",
"58 ADL_acheteursADL_France_ALL18mois 0.112623 \n",
"72 Acheteurs BDL hors gratuité 0.107479 \n",
"39 ADL clients Ile de France 0.101688 \n",
"59 ADL_acheteursADL_IDF 0.099636 \n",
"139 Blacklistés 0.095542 \n",
"45 ADL optin petit couronne 0.081259 \n",
"10 ADL 15km 0.078260 \n",
"357 tarif famille 0.076298 \n",
"60 ADL_acheteursADL_IDF_ALL18mois 0.076005 \n",
"149 CAH MJA France-Belgique-Suisse-Lux 0.073566 \n",
"336 consentement optout bdl individuels 0.070118 \n",
"351 destinataires_campagne_boutemont 0.067179 \n",
"340 consentement optout cdl individuels 0.067175 \n",
"347 consentement optout mja individuels 0.066109 \n",
"56 ADL_Acheteurs_Dali_depuis050521 - 0.065303 \n",
"12 ADL 1fois France hors IDF 0.063167 \n",
"22 ADL FR OPTIN emails ouverts 0.052960 \n",
"125 BDL_anciensacheteursBDL_FRANCE_v2 0.051593 \n",
"11 ADL 15km femme 0.051241 \n",
"205 Export 2 scénario Anniversaire 0.051113 \n",
"204 Export 1 scénario Anniversaire 0.050358 \n",
"79 Analyse_BDL 0.048561 \n",
"13 ADL 1fois IDF hors Paris 0.042128 \n",
"84 Analyse_MJA 0.041119 \n",
"118 BDL_Anciens acheteurs Tintin_v2 0.039127 \n",
"91 BDL - Jumeaux - anciens acheteurs Tintin 0.038955 \n",
"44 ADL optin 2+ 0.038283 \n",
"247 Liste boutemont 0.033839 \n",
"37 ADL clients FR LUX fidélité +2 0.033168 \n",
"24 ADL France 2-14 visites 0.033107 \n",
"14 ADL 1fois Paris 0.032402 \n",
"116 BDL_AcheteursMRC_depuis060521 - Old 0.032150 \n",
"115 BDL_AcheteursMRC_depuis060521 - 07/12/2021 0.032150 \n",
"126 BDL_anciensacheteursBDL_GIRONDE_v2 0.031237 \n",
"81 Analyse_CDL 0.030649 \n",
"9 ADL -20km +30ans 0.027940 \n",
"322 consentement optin cdl individuels 0.027214 \n",
"42 ADL ile de france fidélité+2 0.025974 \n",
"86 Anniversaire 0.025558 \n",
"18 ADL 30-50 ans FID 1 0.024735 \n",
"150 CAH_ADL 0.024249 \n",
"344 consentement optout hdc individuels 0.024003 \n",
"314 consentement bdl individuels 0.023525 \n",
"318 consentement optin bdl individuels 0.023207 \n",
"122 BDL_anciens acheteurs BDL 0.022925 \n",
"183 CDL_Tous Optins 0.022568 \n",
"166 CDL France optin 0.021642 \n",
"167 CDL Optin_France_Belgique_Luxembourg 0.021339 \n",
"175 CDL optin_France 0.020988 \n",
"29 ADL IDF 30-55ans hors Chagall 0.020071 \n",
"327 consentement optin hdc individuels 0.019767 \n",
"271 Optins_BDL 0.019633 \n",
"123 BDL_anciens acheteurs BDL FRANCE 0.019280 \n",
"97 BDL opt-in France 0.019050 \n",
"8 ADL - nouveaux visiteurs Tintin 0.018092 \n",
"274 Optins_HDC 0.017418 \n",
"94 BDL Nouvelle-Aquitaine optin 0.017348 \n",
"49 ADL tarif famille 0.017094 \n",
"21 ADL Cosmos mai 2022 0.015652 \n",
"172 CDL intégrales -150km hors édition 2023 0.014517 \n",
"16 ADL 2+ IDF hors Paris 0.014415 \n",
"98 BDL optin -60km 0.013695 \n",
"124 BDL_anciens acheteurs BDL GIRONDE 0.013515 \n",
"264 Mailing groupes ZAO 0.013343 \n",
"191 Clients CDL -150km hors Intégrales 2022 0.013056 \n",
"227 HDC_CAH_zaowouki 0.012870 \n",
"222 HDC france_optin 0.012835 \n",
"17 ADL 2+ Paris 0.012601 \n",
"325 consentement optin fdl individuels 0.011658 \n",
"180 CDL sauf cezanne -100km 0.011013 \n",
"15 ADL 2+ France hors IDF 0.010999 \n",
"213 HDC -40km 0.010833 \n",
"199 Clients américains ADL optin 0.010710 \n",
"190 Clients ADL USA optin 0.010700 \n",
"193 Clients HDC >100km 0.010622 \n",
"65 ADL_acheteursOcéans_ALL 0.010540 \n",
"5 ADL - Formulaire inscription NL grand public OU scolaires 0.010446 \n",
"302 [AUTO] - Inactifs scénario parralèle 0.010419 \n",
"74 Acheteurs Dali ADL 2021 0.009927 \n",
"27 ADL IDF +55ans hors Chagall 0.009800 \n",
"23 ADL FR visiteurs Dali 0.009441 \n",
"214 HDC >100km 0.008993 \n",
"\n",
" cumulative_customers \n",
"304 0.097485 \n",
"1 0.175441 \n",
"76 0.241973 \n",
"2 0.285646 \n",
"200 0.321404 \n",
"324 0.354381 \n",
"303 0.383091 \n",
"334 0.408067 \n",
"316 0.430682 \n",
"270 0.452547 \n",
"57 0.470585 \n",
"25 0.487941 \n",
"26 0.505260 \n",
"43 0.522505 \n",
"41 0.539387 \n",
"269 0.555269 \n",
"78 0.569630 \n",
"140 0.583914 \n",
"58 0.598012 \n",
"72 0.611465 \n",
"39 0.624194 \n",
"59 0.636665 \n",
"139 0.648624 \n",
"45 0.658796 \n",
"10 0.668592 \n",
"357 0.678142 \n",
"60 0.687656 \n",
"149 0.696864 \n",
"336 0.705641 \n",
"351 0.714050 \n",
"340 0.722458 \n",
"347 0.730734 \n",
"56 0.738908 \n",
"12 0.746814 \n",
"22 0.753444 \n",
"125 0.759902 \n",
"11 0.766316 \n",
"205 0.772713 \n",
"204 0.779017 \n",
"79 0.785095 \n",
"13 0.790369 \n",
"84 0.795516 \n",
"118 0.800413 \n",
"91 0.805289 \n",
"44 0.810081 \n",
"247 0.814317 \n",
"37 0.818469 \n",
"24 0.822613 \n",
"14 0.826669 \n",
"116 0.830693 \n",
"115 0.834717 \n",
"126 0.838627 \n",
"81 0.842464 \n",
"9 0.845961 \n",
"322 0.849368 \n",
"42 0.852619 \n",
"86 0.855818 \n",
"18 0.858914 \n",
"150 0.861950 \n",
"344 0.864954 \n",
"314 0.867899 \n",
"318 0.870804 \n",
"122 0.873673 \n",
"183 0.876498 \n",
"166 0.879207 \n",
"167 0.881878 \n",
"175 0.884505 \n",
"29 0.887018 \n",
"327 0.889492 \n",
"271 0.891949 \n",
"123 0.894363 \n",
"97 0.896747 \n",
"8 0.899012 \n",
"274 0.901192 \n",
"94 0.903364 \n",
"49 0.905503 \n",
"21 0.907462 \n",
"172 0.909280 \n",
"16 0.911084 \n",
"98 0.912798 \n",
"124 0.914490 \n",
"264 0.916160 \n",
"191 0.917794 \n",
"227 0.919405 \n",
"222 0.921012 \n",
"17 0.922589 \n",
"325 0.924048 \n",
"180 0.925427 \n",
"15 0.926804 \n",
"213 0.928160 \n",
"199 0.929500 \n",
"190 0.930840 \n",
"193 0.932169 \n",
"65 0.933489 \n",
"5 0.934796 \n",
"302 0.936100 \n",
"74 0.937343 \n",
"27 0.938570 \n",
"23 0.939751 \n",
"214 0.940877 "
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print_main_target('101', 100)"
]
},
{
"cell_type": "markdown",
"id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901",
"metadata": {},
"source": [
"# Fin travail 25/02"
]
},
{
"cell_type": "markdown",
"id": "c437eaec",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Exemple sur Company 1"
]
},
{
"cell_type": "markdown",
"id": "a1c1fc39",
"metadata": {},
"source": [
"## Chargement données"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "66f8c17b",
"metadata": {},
"outputs": [],
"source": [
"BUCKET = \"bdc2324-data/1\"\n",
"liste_database = fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c08e6798",
"metadata": {},
"outputs": [],
"source": [
"liste_database_select = ['suppliers', 'ticket', 'purchase', 'consumption', 'type_ofs']\n",
"\n",
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
"liste_database_filtered = [element for element in liste_database if any(element_part in element for element_part in liste_database_select)]\n",
"\n",
"# Afficher le résultat\n",
"print(liste_database_filtered)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "675f518d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<<<<<<< local <modified: >\n",
"/tmp/ipykernel_445/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n",
"=======\n",
"/tmp/ipykernel_15285/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n",
">>>>>>> remote <modified: >\n"
]
}
],
"source": [
"# loop to create dataframes from liste\n",
"files_path = liste_database\n",
"\n",
"client_number = files_path[0].split(\"/\")[1]\n",
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
"\n",
"for i in range(len(files_path)) :\n",
" current_path = files_path[i]\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
" # the pattern of the name is df1xxx\n",
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
" globals()[nom_dataframe] = df"
]
},
{
"cell_type": "markdown",
"id": "e855f403",
"metadata": {},
"source": [
"## customersplus.csv"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "91a8f8c4",
"metadata": {},
"outputs": [],
"source": [
"a = pd.DataFrame(df1_customersplus.info())"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "2fda171d",
"metadata": {},
"outputs": [],
"source": [
"def info_colonnes_dataframe(df):\n",
" # Créer une liste pour stocker les informations sur chaque colonne\n",
" infos_colonnes = []\n",
"\n",
" # Parcourir les colonnes du DataFrame\n",
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
" # Calculer le taux de valeurs manquantes\n",
" taux_na = serie.isna().mean() * 100\n",
"\n",
" # Ajouter les informations à la liste\n",
" infos_colonnes.append({\n",
" 'Nom_colonne': nom_colonne,\n",
" 'Type_colonne': str(serie.dtype),\n",
" 'Taux_NA': taux_na\n",
" })\n",
"\n",
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
"\n",
" return df_infos_colonnes"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "205eeeab",
"metadata": {},
"outputs": [],
"source": [
"def cleaning_date(df, column_name):\n",
" \"\"\"\n",
" Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
"\n",
" Parameters:\n",
" - df: DataFrame\n",
" Le DataFrame contenant la colonne à nettoyer.\n",
" - column_name: str\n",
" Le nom de la colonne à nettoyer.\n",
"\n",
" Returns:\n",
" - DataFrame\n",
" Le DataFrame modifié avec la colonne nettoyée.\n",
" \"\"\"\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "634282c5",
"metadata": {},
"outputs": [],
"source": [
"a = info_colonnes_dataframe(df1_customersplus)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "0e8d4133",
"metadata": {},
"outputs": [],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "1268ad5a",
"metadata": {},
"outputs": [],
"source": [
"a = pd.DataFrame(df1_customersplus.isna().sum()/len(df1_customersplus)*100)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "bd41dc80",
"metadata": {},
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_customersplus_clean = df1_customersplus.copy()\n",
"\n",
"cleaning_date(df1_customersplus_clean, 'first_buying_date')\n",
"cleaning_date(df1_customersplus_clean, 'last_visiting_date')\n",
"\n",
"df1_customersplus_clean.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n",
"df1_customersplus_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "64d0f76b",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## tickets.csv"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7e683711",
"metadata": {},
"outputs": [],
"source": [
"df1_tickets"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e7b9a52e",
"metadata": {},
"outputs": [],
"source": [
"df1_tickets.info()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "568280e8",
"metadata": {},
"outputs": [],
"source": [
"df1_tickets.isna().sum()/len(df1_tickets)*100"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "29ecec90",
"metadata": {},
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_tickets_clean = df1_tickets.drop(['lastname', 'firstname', 'email', 'created_at', 'updated_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode'], axis = 1, inplace=True)\n",
"df1_tickets_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)"
]
},
{
"cell_type": "markdown",
"id": "22bb5de4",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## suppliers.csv"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "6a9a91f4",
"metadata": {},
"outputs": [],
"source": [
"df1_suppliers"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "bab4758a",
"metadata": {},
"outputs": [],
"source": [
"df1_suppliers.info()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b5fff251",
"metadata": {},
"outputs": [],
"source": [
"df1_suppliers.isna().sum()/len(df1_suppliers)*100"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "8b09e2a3",
"metadata": {},
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_suppliers_clean = df1_suppliers[['id', 'name']]\n",
"df1_suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ecee7cdc",
"metadata": {},
"outputs": [],
"source": [
"df1_suppliers_clean"
]
},
{
"cell_type": "markdown",
"id": "c8e6e69b",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## type_ofs.csv"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "1a6cff1f",
"metadata": {},
"outputs": [],
"source": [
"df1_type_ofs"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "93630b41",
"metadata": {},
"outputs": [],
"source": [
"df1_type_ofs.info()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "4f94481a",
"metadata": {},
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_type_ofs_clean = df1_type_ofs[['id', 'name', 'children']]\n",
"df1_type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)"
]
},
{
"cell_type": "markdown",
"id": "1b2811e2",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## purchases.csv"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "2455d2e1",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df1_purchases"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "5f9a159d",
"metadata": {},
"outputs": [],
"source": [
"df1_purchases.info()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "db201bf7",
"metadata": {},
"outputs": [],
"source": [
"# Nettoyage purchase_date\n",
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], utc = True)\n",
"df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], format = 'ISO8601')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "bd436fca",
"metadata": {},
"outputs": [],
"source": [
"df1_purchases.info()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "83435862",
"metadata": {},
"outputs": [],
"source": [
"# Selection des variables\n",
"df1_purchases_clean = df1_purchases[['id', 'purchase_date', 'customer_id']]"
]
},
{
"cell_type": "markdown",
"id": "f210e730",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Fusion de l'ensemble des données billétiques"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "1f8b3aa7",
"metadata": {},
"outputs": [],
"source": [
"# Fusion avec fournisseurs\n",
"df1_ticket_information = pd.merge(df1_tickets_clean, df1_suppliers_clean, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
"df1_ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
"\n",
"# Fusion avec type de tickets\n",
"df1_ticket_information = pd.merge(df1_ticket_information, df1_type_ofs_clean, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
"df1_ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
"\n",
"# Fusion avec achats\n",
"df1_ticket_information = pd.merge(df1_ticket_information, df1_purchases_clean, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
"df1_ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "83a4d021",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df1_ticket_information"
]
},
{
"cell_type": "markdown",
"id": "56e6ebd1",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Utilisation de fonctions"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "88fcde4b",
"metadata": {},
"outputs": [],
"source": [
"# Créer un DataFrame exemple\n",
"df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n",
"\n",
"# Appliquer la fonction pour nettoyer la colonne 'purchase_date' de manière vectorisée\n",
"df_clean = cleaning_date(df_not_clean, 'opened_at')\n",
"df_clean.rename(columns = {'opened_at' : 'opened_at_clean'}, inplace = True)\n",
"\n",
"test = pd.concat([df1_campaign_stats[['opened_at']].head(20), df_clean], axis=1)\n",
"\n",
"test.info()"
]
},
{
"cell_type": "markdown",
"id": "818f69db",
"metadata": {},
"source": [
"## Nettoyage, selection et fusion"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "c9654eda",
"metadata": {},
"outputs": [],
"source": [
"df1_ticket_information"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7f2b620c",
"metadata": {},
"outputs": [],
"source": [
"df1_ticket_information.info()"
]
},
{
"cell_type": "markdown",
"id": "637bdb72",
"metadata": {},
"source": [
"# Customer information"
]
},
{
"cell_type": "markdown",
"id": "14c52894",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Target area"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d83abfbf",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_15285/2625134041.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
]
}
],
"source": [
"# Target.csv cleaning\n",
"df1_targets_clean = df1_targets[[\"id\", \"target_type_id\", \"name\"]]\n",
"df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
"\n",
"# target_type cleaning\n",
"df1_target_types_clean = df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
"\n",
"#customer_target_mappings cleaning\n",
"df1_customer_target_mappings_clean = df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
"\n",
"# Merge target et target_type\n",
"df1_targets_full = pd.merge(df1_targets_clean, df1_target_types_clean, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
"df1_targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
"\n",
"# Merge\n",
"df1_targets_full = pd.merge(df1_customer_target_mappings_clean, df1_targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
"df1_targets_full.drop(['target_id'], axis = 1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "90d71b2c",
"metadata": {},
"outputs": [],
"source": [
"df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n",
"len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n",
"\n",
"# 99,6% des 151 000 client visés sont catégorisés plusieurs fois et en moyenne 5 fois... \n",
"df1_targets_test.mean()\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "2301de1e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1184824</td>\n",
" <td>645400</td>\n",
" <td>DDCP PROMO Réseau livres</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>210571</td>\n",
" <td>2412</td>\n",
" <td>DDCP PROMO Réseau livres</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>210572</td>\n",
" <td>4536</td>\n",
" <td>DDCP PROMO Réseau livres</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>210573</td>\n",
" <td>6736</td>\n",
" <td>DDCP PROMO Réseau livres</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>210574</td>\n",
" <td>38210</td>\n",
" <td>DDCP PROMO Réseau livres</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1184824 645400 DDCP PROMO Réseau livres False \n",
"1 210571 2412 DDCP PROMO Réseau livres False \n",
"2 210572 4536 DDCP PROMO Réseau livres False \n",
"3 210573 6736 DDCP PROMO Réseau livres False \n",
"4 210574 38210 DDCP PROMO Réseau livres False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_targets_full.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "75fbc2f7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n",
"[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
"[nltk_data] Package stopwords is already up-to-date!\n",
"[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
"[nltk_data] Package wordnet is already up-to-date!\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Catégorisation des target_name\n",
"import pandas as pd\n",
"import nltk\n",
"from nltk.tokenize import word_tokenize\n",
"from nltk.corpus import stopwords\n",
"from nltk.stem import WordNetLemmatizer\n",
"from nltk.probability import FreqDist\n",
"\n",
"# Téléchargement des ressources nécessaires\n",
"nltk.download('punkt')\n",
"nltk.download('stopwords')\n",
"nltk.download('wordnet')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "55cddf92",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mots les plus fréquents:\n",
"consentement: 550777\n",
"optin: 463579\n",
"jeune: 155103\n",
"public: 155103\n",
"mediation: 150001\n"
]
}
],
"source": [
"# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n",
"def preprocess_text(texte):\n",
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
" texte_concat = ' '.join(texte)\n",
" \n",
" # Tokenisation des mots\n",
" tokens = word_tokenize(texte_concat.lower())\n",
" \n",
" # Suppression des mots vides (stopwords)\n",
" stop_words = set(stopwords.words('french'))\n",
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
" \n",
" # Lemmatisation des mots\n",
" lemmatizer = WordNetLemmatizer()\n",
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
" \n",
" return lemmatized_tokens\n",
"\n",
"\n",
"# Appliquer le prétraitement à la colonne de texte\n",
"df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n",
"\n",
"# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n",
"all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n",
"\n",
"# Calculer la fréquence des mots\n",
"freq_dist = FreqDist(all_words)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "7fd98a85",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mots les plus fréquents:\n",
"consentement: 550777\n",
"optin: 463579\n",
"jeune: 155103\n",
"public: 155103\n",
"mediation: 150001\n",
"specialisee: 150001\n",
"b2c: 143432\n",
"optout: 97683\n",
"newsletter: 56022\n",
"(: 46084\n",
"): 46084\n",
"inscrits: 42296\n",
"nl: 42294\n",
"générale: 41037\n",
"generale: 40950\n"
]
}
],
"source": [
"# Affichage des mots les plus fréquents\n",
"print(\"Mots les plus fréquents:\")\n",
"for mot, freq in freq_dist.most_common(15):\n",
" print(f\"{mot}: {freq}\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "cf94bb1d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" texte \\\n",
"0 Le chat noir mange une souris. \n",
"1 Le chien blanc aboie. \n",
"\n",
" texte_preprocessed \n",
"0 [e, h, a, o, i, r, a, g, e, u, e, o, u, r, i, .] \n",
"1 [e, h, i, e, b, a, a, b, o, i, e, .] \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n",
"[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n",
"[nltk_data] Package stopwords is already up-to-date!\n",
"[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n",
"[nltk_data] Package wordnet is already up-to-date!\n"
]
}
],
"source": [
"import pandas as pd\n",
"import nltk\n",
"from nltk.tokenize import word_tokenize\n",
"from nltk.corpus import stopwords\n",
"from nltk.stem import WordNetLemmatizer\n",
"\n",
"# Téléchargement des ressources nécessaires\n",
"nltk.download('punkt')\n",
"nltk.download('stopwords')\n",
"nltk.download('wordnet')\n",
"\n",
"# Création de la DataFrame d'exemple\n",
"data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n",
"df = pd.DataFrame(data)\n",
"\n",
"# Fonction pour prétraiter le texte\n",
"def preprocess_text(texte):\n",
" # Concaténation des éléments de la liste en une seule chaîne de caractères\n",
" texte_concat = ' '.join(texte)\n",
" \n",
" # Tokenisation des mots\n",
" tokens = word_tokenize(texte_concat.lower())\n",
" \n",
" # Suppression des mots vides (stopwords)\n",
" stop_words = set(stopwords.words('french'))\n",
" filtered_tokens = [word for word in tokens if word not in stop_words]\n",
" \n",
" # Lemmatisation des mots\n",
" lemmatizer = WordNetLemmatizer()\n",
" lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n",
" \n",
" return lemmatized_tokens\n",
"\n",
"# Appliquer la fonction de prétraitement à la colonne de texte\n",
"df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n",
"\n",
"# Afficher le résultat\n",
"print(df)\n"
]
},
{
"cell_type": "markdown",
"id": "711d3884",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Campaign area"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "c25b5295",
"metadata": {},
"outputs": [],
"source": [
"# campaign_stats cleaning \n",
"df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
"cleaning_date(df1_campaign_stats_clean, 'opened_at')\n",
"cleaning_date(df1_campaign_stats_clean, 'sent_at')\n",
"cleaning_date(df1_campaign_stats_clean, 'delivered_at')\n",
"\n",
"# campaigns cleaning\n",
"df1_campaigns_clean = df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
"cleaning_date(df1_campaigns_clean, 'campaign_sent_at')\n",
"\n",
"# Merge \n",
"df1_campaigns_full = pd.merge(df1_campaign_stats_clean, df1_campaigns_clean, on = \"campaign_id\", how = \"left\")\n",
"df1_campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "2a3de6a5",
"metadata": {},
"outputs": [],
"source": [
"df1_campaigns_full.info()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "3fc1f446",
"metadata": {},
"outputs": [],
"source": [
"df1_campaigns_information"
]
},
{
"cell_type": "markdown",
"id": "20e69ee3",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Link area"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "d9cbdbce",
"metadata": {},
"outputs": [],
"source": [
"df1_campaigns"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "c07459f0",
"metadata": {},
"outputs": [],
"source": [
"df1_link_stats"
]
},
{
"cell_type": "markdown",
"id": "80ae4c42",
"metadata": {},
"source": [
"## Exploration variables"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b50b8f95",
"metadata": {},
"outputs": [],
"source": [
"# Fonction d'exploration pour suppliers.csv = label itr et commission inconnues\n",
"def suppliers_exploration(suppliers = None) : \n",
" \n",
" # Taux de NaN pour ces colonnes\n",
" label_na = suppliers['label'].isna().sum()/len(suppliers)*100\n",
" itr_na = suppliers['itr'].isna().sum()/len(suppliers)*100\n",
" commission_na = suppliers['commission'].isna().sum()/len(suppliers)*100\n",
"\n",
" suppliers_desc = pd.DataFrame({'nb_suppliers' : [suppliers['name'].nunique()],\n",
" 'label_na' : [label_na],\n",
" 'itr_na' : [itr_na],\n",
" 'commission_na' : [commission_na]})\n",
"\n",
" return suppliers_desc"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7e292935",
"metadata": {},
"outputs": [],
"source": [
"df1_suppliers_desc = suppliers_exploration(suppliers = df1_suppliers)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "05b6f2b0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_suppliers</th>\n",
" <th>label_na</th>\n",
" <th>itr_na</th>\n",
" <th>commission_na</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9</td>\n",
" <td>100.0</td>\n",
" <td>100.0</td>\n",
" <td>100.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nb_suppliers label_na itr_na commission_na\n",
"0 9 100.0 100.0 100.0"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_suppliers_desc"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c9324d80",
"metadata": {},
"outputs": [],
"source": [
"BUCKET = \"bdc2324-data\"\n",
"liste_folders = fs.ls(BUCKET)\n",
"\n",
"liste_files = []\n",
"for company_folder in liste_folders : \n",
" liste_files.extend(fs.ls(company_folder))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "10304058",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['bdc2324-data/1/1suppliers.csv', 'bdc2324-data/10/10suppliers.csv', 'bdc2324-data/101/101suppliers.csv', 'bdc2324-data/11/11suppliers.csv', 'bdc2324-data/12/12suppliers.csv', 'bdc2324-data/13/13suppliers.csv', 'bdc2324-data/14/14suppliers.csv', 'bdc2324-data/2/2suppliers.csv', 'bdc2324-data/3/3suppliers.csv', 'bdc2324-data/4/4suppliers.csv', 'bdc2324-data/5/5suppliers.csv', 'bdc2324-data/6/6suppliers.csv', 'bdc2324-data/7/7suppliers.csv', 'bdc2324-data/8/8suppliers.csv', 'bdc2324-data/9/9suppliers.csv']\n"
]
}
],
"source": [
"liste_database_select = ['suppliers']\n",
"\n",
"# Filtrer la liste pour les éléments contenant au moins un élément de la liste à tester\n",
"liste_suppliers = [element for element in liste_files if any(element_part in element for element_part in liste_database_select)]\n",
"\n",
"# Afficher le résultat\n",
"print(liste_suppliers)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "ffa423e5",
"metadata": {},
"outputs": [],
"source": [
"# loop to create dataframes from file 2\n",
"def database_loading(database_name = None):\n",
" files_path = database_name\n",
" \n",
" client_number = files_path.split(\"/\")[1]\n",
" df_prefix = \"df\" + str(client_number) + \"_\"\n",
" \n",
" current_path = files_path\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
"\n",
" return df, client_number"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70bdc88d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 45,
"id": "6a0f567d",
"metadata": {},
"outputs": [],
"source": [
"df_all = pd.DataFrame()\n",
"\n",
"for link in liste_suppliers:\n",
" \n",
" df_supplier, tenant_id = database_loading(link)\n",
" \n",
" df_supplier['tenant_id'] = int(tenant_id)\n",
"\n",
" df_all = pd.concat([df_all, df_supplier], axis = 0)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "1522d8cd",
"metadata": {},
"outputs": [],
"source": [
"# df_all[df_all['tenant_id'] == 101]['name'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "b0e42a61",
"metadata": {},
"outputs": [],
"source": [
"liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] \n",
"# vad = vente à distance\n",
"df_all['name'] = df_all['name'].fillna('')\n",
"\n",
"df_all['canal_vente_internet'] = df_all['name'].str.contains('|'.join(liste_mots), case=False).astype(int)\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "d299ae91",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tenant_id\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 1\n",
"5 1\n",
"6 1\n",
"7 1\n",
"8 1\n",
"9 1\n",
"10 1\n",
"11 1\n",
"12 1\n",
"13 1\n",
"14 1\n",
"101 1\n",
"Name: canal_vente_internet, dtype: int64"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_all.groupby('tenant_id')['canal_vente_internet'].max()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}