1729 lines
60 KiB
Plaintext
1729 lines
60 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "0eefb67b-5399-44fa-9c1c-7724ec1c7cd2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import warnings\n",
|
||
"from datetime import date, timedelta, datetime\n",
|
||
"import numpy as np"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "37977b4e-42e7-4d8e-8b9a-6843292fd128",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Import KPI construction functions\n",
|
||
"exec(open('0_KPI_functions.py').read())\n",
|
||
"# exec(open('../0_KPI_functions.py').read())\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"id": "cca62d72-f809-41a9-bb06-1be7d6b09307",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
|
||
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
|
||
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
|
||
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "68fb54f3-8eb3-4cd0-966b-000876912fb5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>max_price</th>\n",
|
||
" <th>ticket_sum</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>average_purchase_delay</th>\n",
|
||
" <th>average_price_basket</th>\n",
|
||
" <th>average_ticket_basket</th>\n",
|
||
" <th>total_price</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>821538</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>809126</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11005</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>17663</td>\n",
|
||
" <td>12731</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>38100</td>\n",
|
||
" <td>12395</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98789</th>\n",
|
||
" <td>766266</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>181304.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98790</th>\n",
|
||
" <td>766336</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>178189.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98791</th>\n",
|
||
" <td>766348</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>178141.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98792</th>\n",
|
||
" <td>766363</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>176807.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98793</th>\n",
|
||
" <td>766366</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>176788.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>98794 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
|
||
"0 821538 139 NaN NaN 0 \n",
|
||
"1 809126 1063 NaN NaN 0 \n",
|
||
"2 11005 1063 NaN NaN 0 \n",
|
||
"3 17663 12731 NaN NaN 0 \n",
|
||
"4 38100 12395 NaN NaN 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"98789 766266 139 NaN 181304.0 0 \n",
|
||
"98790 766336 139 NaN 178189.0 0 \n",
|
||
"98791 766348 139 NaN 178141.0 0 \n",
|
||
"98792 766363 139 NaN 176807.0 0 \n",
|
||
"98793 766366 139 NaN 176788.0 0 \n",
|
||
"\n",
|
||
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
|
||
"0 875 False NaN 2 True ... \n",
|
||
"1 875 False NaN 2 True ... \n",
|
||
"2 875 False NaN 2 False ... \n",
|
||
"3 875 False NaN 0 False ... \n",
|
||
"4 875 False NaN 0 True ... \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"98789 875 False NaN 2 True ... \n",
|
||
"98790 875 False NaN 2 True ... \n",
|
||
"98791 875 False NaN 2 True ... \n",
|
||
"98792 875 False NaN 2 True ... \n",
|
||
"98793 875 False NaN 2 True ... \n",
|
||
"\n",
|
||
" max_price ticket_sum average_price average_purchase_delay \\\n",
|
||
"0 NaN 0 NaN NaN \n",
|
||
"1 NaN 0 NaN NaN \n",
|
||
"2 NaN 0 0.0 NaN \n",
|
||
"3 NaN 0 0.0 NaN \n",
|
||
"4 NaN 0 0.0 NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"98789 NaN 0 NaN NaN \n",
|
||
"98790 NaN 0 NaN NaN \n",
|
||
"98791 NaN 0 NaN NaN \n",
|
||
"98792 NaN 0 NaN NaN \n",
|
||
"98793 NaN 0 NaN NaN \n",
|
||
"\n",
|
||
" average_price_basket average_ticket_basket total_price \\\n",
|
||
"0 NaN NaN 0.0 \n",
|
||
"1 NaN NaN 0.0 \n",
|
||
"2 NaN NaN NaN \n",
|
||
"3 NaN NaN NaN \n",
|
||
"4 NaN NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"98789 NaN NaN 0.0 \n",
|
||
"98790 NaN NaN 0.0 \n",
|
||
"98791 NaN NaN 0.0 \n",
|
||
"98792 NaN NaN 0.0 \n",
|
||
"98793 NaN NaN 0.0 \n",
|
||
"\n",
|
||
" purchase_count first_buying_date country \n",
|
||
"0 0 NaN NaN \n",
|
||
"1 0 NaN fr \n",
|
||
"2 14 NaN fr \n",
|
||
"3 1 NaN fr \n",
|
||
"4 1 NaN fr \n",
|
||
"... ... ... ... \n",
|
||
"98789 0 NaN NaN \n",
|
||
"98790 0 NaN NaN \n",
|
||
"98791 0 NaN NaN \n",
|
||
"98792 0 NaN NaN \n",
|
||
"98793 0 NaN NaN \n",
|
||
"\n",
|
||
"[98794 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customerplus_cleaned"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"BUCKET = \"projet-bdc2324-team1\"\n",
|
||
"FILE_KEY_S3 = \"0_Input/Company_10/customerplus_cleaned.csv\"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" df_customerplus_cleaned = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"id": "bcdba447-90f7-450c-b4a3-6da656e38493",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_438/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"BUCKET = \"projet-bdc2324-team1\"\n",
|
||
"FILE_KEY_S3 = \"0_Input/Company_10/products_purchased_reduced.csv\"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
|
||
" \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "637aa400-f49a-4d8d-802a-868b241f8a9d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
|
||
"for nom_base in dic_base:\n",
|
||
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
|
||
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "e60529b5-986f-4685-91e1-782c2b022e09",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>target_type_is_import</th>\n",
|
||
" <th>target_type_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1165098</td>\n",
|
||
" <td>618562</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1165100</td>\n",
|
||
" <td>618559</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1165101</td>\n",
|
||
" <td>618561</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1165102</td>\n",
|
||
" <td>618560</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1165103</td>\n",
|
||
" <td>618558</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69253</th>\n",
|
||
" <td>1698158</td>\n",
|
||
" <td>18580</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69254</th>\n",
|
||
" <td>1698159</td>\n",
|
||
" <td>18569</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69255</th>\n",
|
||
" <td>1698160</td>\n",
|
||
" <td>2962</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69256</th>\n",
|
||
" <td>1698161</td>\n",
|
||
" <td>3825</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69257</th>\n",
|
||
" <td>1698162</td>\n",
|
||
" <td>5731</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>69258 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_name target_type_is_import \\\n",
|
||
"0 1165098 618562 Newsletter mensuelle False \n",
|
||
"1 1165100 618559 Newsletter mensuelle False \n",
|
||
"2 1165101 618561 Newsletter mensuelle False \n",
|
||
"3 1165102 618560 Newsletter mensuelle False \n",
|
||
"4 1165103 618558 Newsletter mensuelle False \n",
|
||
"... ... ... ... ... \n",
|
||
"69253 1698158 18580 Newsletter mensuelle False \n",
|
||
"69254 1698159 18569 Newsletter mensuelle False \n",
|
||
"69255 1698160 2962 Newsletter mensuelle False \n",
|
||
"69256 1698161 3825 Newsletter mensuelle False \n",
|
||
"69257 1698162 5731 Newsletter mensuelle False \n",
|
||
"\n",
|
||
" target_type_name \n",
|
||
"0 manual_static_filter \n",
|
||
"1 manual_static_filter \n",
|
||
"2 manual_static_filter \n",
|
||
"3 manual_static_filter \n",
|
||
"4 manual_static_filter \n",
|
||
"... ... \n",
|
||
"69253 manual_static_filter \n",
|
||
"69254 manual_static_filter \n",
|
||
"69255 manual_static_filter \n",
|
||
"69256 manual_static_filter \n",
|
||
"69257 manual_static_filter \n",
|
||
"\n",
|
||
"[69258 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"target_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "6ece1bb3-5a2d-41f8-be96-eb70697881dc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<string>:27: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>29</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>37</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 05:16:38</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>41</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 01:12:29</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>44</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>57138</th>\n",
|
||
" <td>827940</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>57139</th>\n",
|
||
" <td>827941</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>57140</th>\n",
|
||
" <td>827942</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>57141</th>\n",
|
||
" <td>827943</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>57142</th>\n",
|
||
" <td>827944</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>57143 rows × 4 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_campaigns nb_campaigns_opened time_to_open\n",
|
||
"0 29 4 NaN NaT\n",
|
||
"1 37 3 NaN NaT\n",
|
||
"2 39 4 1.0 0 days 05:16:38\n",
|
||
"3 41 4 1.0 0 days 01:12:29\n",
|
||
"4 44 4 NaN NaT\n",
|
||
"... ... ... ... ...\n",
|
||
"57138 827940 1 NaN NaT\n",
|
||
"57139 827941 1 NaN NaT\n",
|
||
"57140 827942 1 NaN NaT\n",
|
||
"57141 827943 1 NaN NaT\n",
|
||
"57142 827944 1 NaN NaT\n",
|
||
"\n",
|
||
"[57143 rows x 4 columns]"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"campaigns_kpi_function(campaigns)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"id": "8c42f4a3-bdbc-44fe-a873-3192b983410d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# KPI sur le comportement d'achat\n",
|
||
"df_tickets_kpi = tickets_kpi_function(purchases)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"id": "df124880-1e4f-4eaf-b0ef-72bb4f840d45",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>19482</td>\n",
|
||
" <td>88</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>872.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2643.092500</td>\n",
|
||
" <td>718.149398</td>\n",
|
||
" <td>1924.943102</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>19484</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>62.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1745.021736</td>\n",
|
||
" <td>1743.045035</td>\n",
|
||
" <td>1.976701</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>19485</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>1878.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2649.044745</td>\n",
|
||
" <td>85.240845</td>\n",
|
||
" <td>2563.803900</td>\n",
|
||
" <td>84.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>19486</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>96.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1944.077604</td>\n",
|
||
" <td>1742.794225</td>\n",
|
||
" <td>201.283380</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>19487</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>33.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1742.877766</td>\n",
|
||
" <td>1742.877766</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26100</th>\n",
|
||
" <td>824877</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>-12.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5.956111</td>\n",
|
||
" <td>5.956111</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26101</th>\n",
|
||
" <td>824878</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5.956921</td>\n",
|
||
" <td>5.956921</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26102</th>\n",
|
||
" <td>824879</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>-38.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5.226238</td>\n",
|
||
" <td>5.226238</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26103</th>\n",
|
||
" <td>824991</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>-100.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>3.021539</td>\n",
|
||
" <td>3.017222</td>\n",
|
||
" <td>0.004317</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26104</th>\n",
|
||
" <td>824998</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>25.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.072720</td>\n",
|
||
" <td>0.072720</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>26105 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 19482 88 29 872.0 2 \n",
|
||
"1 19484 3 2 62.0 1 \n",
|
||
"2 19485 131 21 1878.0 2 \n",
|
||
"3 19486 10 4 96.0 1 \n",
|
||
"4 19487 2 1 33.0 1 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"26100 824877 1 1 -12.0 1 \n",
|
||
"26101 824878 1 1 12.0 1 \n",
|
||
"26102 824879 2 1 -38.0 1 \n",
|
||
"26103 824991 14 3 -100.0 1 \n",
|
||
"26104 824998 1 1 25.0 1 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 1 2643.092500 718.149398 \n",
|
||
"1 0 1745.021736 1743.045035 \n",
|
||
"2 1 2649.044745 85.240845 \n",
|
||
"3 0 1944.077604 1742.794225 \n",
|
||
"4 0 1742.877766 1742.877766 \n",
|
||
"... ... ... ... \n",
|
||
"26100 0 5.956111 5.956111 \n",
|
||
"26101 0 5.956921 5.956921 \n",
|
||
"26102 0 5.226238 5.226238 \n",
|
||
"26103 0 3.021539 3.017222 \n",
|
||
"26104 0 0.072720 0.072720 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet \n",
|
||
"0 1924.943102 8.0 \n",
|
||
"1 1.976701 0.0 \n",
|
||
"2 2563.803900 84.0 \n",
|
||
"3 201.283380 0.0 \n",
|
||
"4 0.000000 0.0 \n",
|
||
"... ... ... \n",
|
||
"26100 0.000000 0.0 \n",
|
||
"26101 0.000000 0.0 \n",
|
||
"26102 0.000000 0.0 \n",
|
||
"26103 0.004317 0.0 \n",
|
||
"26104 0.000000 0.0 \n",
|
||
"\n",
|
||
"[26105 rows x 10 columns]"
|
||
]
|
||
},
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_tickets_kpi"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"id": "4e8c0d75-117f-4400-8d55-b3ae3f43501b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>total_price</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>has_tags</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>821538</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>809126</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11005</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>17663</td>\n",
|
||
" <td>12731</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>38100</td>\n",
|
||
" <td>12395</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98789</th>\n",
|
||
" <td>766266</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>181304.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98790</th>\n",
|
||
" <td>766336</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>178189.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98791</th>\n",
|
||
" <td>766348</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>178141.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98792</th>\n",
|
||
" <td>766363</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>176807.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98793</th>\n",
|
||
" <td>766366</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>176788.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>98794 rows × 28 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
|
||
"0 821538 139 NaN NaN 0 \n",
|
||
"1 809126 1063 NaN NaN 0 \n",
|
||
"2 11005 1063 NaN NaN 0 \n",
|
||
"3 17663 12731 NaN NaN 0 \n",
|
||
"4 38100 12395 NaN NaN 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"98789 766266 139 NaN 181304.0 0 \n",
|
||
"98790 766336 139 NaN 178189.0 0 \n",
|
||
"98791 766348 139 NaN 178141.0 0 \n",
|
||
"98792 766363 139 NaN 176807.0 0 \n",
|
||
"98793 766366 139 NaN 176788.0 0 \n",
|
||
"\n",
|
||
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
|
||
"0 875 False NaN 2 True ... \n",
|
||
"1 875 False NaN 2 True ... \n",
|
||
"2 875 False NaN 2 False ... \n",
|
||
"3 875 False NaN 0 False ... \n",
|
||
"4 875 False NaN 0 True ... \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"98789 875 False NaN 2 True ... \n",
|
||
"98790 875 False NaN 2 True ... \n",
|
||
"98791 875 False NaN 2 True ... \n",
|
||
"98792 875 False NaN 2 True ... \n",
|
||
"98793 875 False NaN 2 True ... \n",
|
||
"\n",
|
||
" total_price purchase_count first_buying_date country gender_label \\\n",
|
||
"0 0.0 0 NaN NaN other \n",
|
||
"1 0.0 0 NaN fr other \n",
|
||
"2 NaN 14 NaN fr other \n",
|
||
"3 NaN 1 NaN fr female \n",
|
||
"4 NaN 1 NaN fr female \n",
|
||
"... ... ... ... ... ... \n",
|
||
"98789 0.0 0 NaN NaN other \n",
|
||
"98790 0.0 0 NaN NaN other \n",
|
||
"98791 0.0 0 NaN NaN other \n",
|
||
"98792 0.0 0 NaN NaN other \n",
|
||
"98793 0.0 0 NaN NaN other \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr has_tags \n",
|
||
"0 0 0 1 NaN 0 \n",
|
||
"1 0 0 1 1.0 0 \n",
|
||
"2 0 0 1 1.0 0 \n",
|
||
"3 1 0 0 1.0 0 \n",
|
||
"4 1 0 0 1.0 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"98789 0 0 1 NaN 0 \n",
|
||
"98790 0 0 1 NaN 0 \n",
|
||
"98791 0 0 1 NaN 0 \n",
|
||
"98792 0 0 1 NaN 0 \n",
|
||
"98793 0 0 1 NaN 0 \n",
|
||
"\n",
|
||
"[98794 rows x 28 columns]"
|
||
]
|
||
},
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
" # KPI sur les données socio-démographiques\n",
|
||
"df_customerplus_clean = customerplus_kpi_function(df_customerplus_cleaned)\n",
|
||
" \n",
|
||
"df_customerplus_clean"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "484979cc-d4a4-4d9d-9701-71a4f353a372",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_438/1359829443.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"BUCKET = \"projet-bdc2324-team1\"\n",
|
||
"FILE_KEY_S3 = \"0_Input/Company_10/campaigns_information.csv\"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n",
|
||
" \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "553ca2e7-ead4-4508-8247-fcc602abd249",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"BUCKET = \"projet-bdc2324-team1\"\n",
|
||
"FILE_KEY_S3 = \"0_Input/Company_10/target_information.csv\"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" targets = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "17b89ca1-deea-4139-a6c0-7822cc4e7a90",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>target_type_is_import</th>\n",
|
||
" <th>target_type_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1165098</td>\n",
|
||
" <td>618562</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1165100</td>\n",
|
||
" <td>618559</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1165101</td>\n",
|
||
" <td>618561</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1165102</td>\n",
|
||
" <td>618560</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1165103</td>\n",
|
||
" <td>618558</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69253</th>\n",
|
||
" <td>1698158</td>\n",
|
||
" <td>18580</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69254</th>\n",
|
||
" <td>1698159</td>\n",
|
||
" <td>18569</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69255</th>\n",
|
||
" <td>1698160</td>\n",
|
||
" <td>2962</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69256</th>\n",
|
||
" <td>1698161</td>\n",
|
||
" <td>3825</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69257</th>\n",
|
||
" <td>1698162</td>\n",
|
||
" <td>5731</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>69258 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_name target_type_is_import \\\n",
|
||
"0 1165098 618562 Newsletter mensuelle False \n",
|
||
"1 1165100 618559 Newsletter mensuelle False \n",
|
||
"2 1165101 618561 Newsletter mensuelle False \n",
|
||
"3 1165102 618560 Newsletter mensuelle False \n",
|
||
"4 1165103 618558 Newsletter mensuelle False \n",
|
||
"... ... ... ... ... \n",
|
||
"69253 1698158 18580 Newsletter mensuelle False \n",
|
||
"69254 1698159 18569 Newsletter mensuelle False \n",
|
||
"69255 1698160 2962 Newsletter mensuelle False \n",
|
||
"69256 1698161 3825 Newsletter mensuelle False \n",
|
||
"69257 1698162 5731 Newsletter mensuelle False \n",
|
||
"\n",
|
||
" target_type_name \n",
|
||
"0 manual_static_filter \n",
|
||
"1 manual_static_filter \n",
|
||
"2 manual_static_filter \n",
|
||
"3 manual_static_filter \n",
|
||
"4 manual_static_filter \n",
|
||
"... ... \n",
|
||
"69253 manual_static_filter \n",
|
||
"69254 manual_static_filter \n",
|
||
"69255 manual_static_filter \n",
|
||
"69256 manual_static_filter \n",
|
||
"69257 manual_static_filter \n",
|
||
"\n",
|
||
"[69258 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"targets"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"id": "c90d94ab-cf0e-4d18-9d5e-cb1d22f4d58b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "SyntaxError",
|
||
"evalue": "f-string: expecting '}' (1665996669.py, line 1)",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[0;36m Cell \u001b[0;32mIn[40], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m f-string: expecting '}'\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{'musee'}'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d6767ba6-94ef-43f9-8f67-15ecdb41a70b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|