BDC-team-1/Spectacle/Stat_desc.ipynb
2024-03-02 12:05:51 +00:00

1248 lines
47 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import s3fs\n",
"import warnings\n",
"from datetime import date, timedelta, datetime\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
"source": [
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def display_databases(directory_path, file_name, datetime_col = None):\n",
" \"\"\"\n",
" This function returns the file from s3 storage \n",
" \"\"\"\n",
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
" print(\"File path : \", file_path)\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
" return df \n"
]
},
{
"cell_type": "markdown",
"id": "e22eb500-80da-4dd9-8b20-9e4deec64831",
"metadata": {},
"source": [
"nb_compagnie=['10','11','12','13','14']\n",
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
" df_customerplus_clean[\"Number_compagnie\"]=int(directory_path)\n",
" df_campaigns_information[\"Number_compagnie\"]=int(directory_path)\n",
" df_products_purchased_reduced[\"Number_compagnie\"]=int(directory_path)\n",
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_information],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_products_purchased_reduced],axis=0)\n",
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_information\n",
" products_purchased_reduced_spectacle=df_products_purchased_reduced\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_8330/2987234667.py:8: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_8330/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_8330/2987234667.py:8: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
}
],
"source": [
"#creation de KPI aggreger par thématique\n",
"\n",
"nb_compagnie=['10','11','12','13','14']\n",
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
" \n",
"#creation de la colonne Number compagnie\n",
" df_tickets_kpi[\"Number_compagnie\"]=int(directory_path)\n",
" df_campaigns_kpi[\"Number_compagnie\"]=int(directory_path)\n",
" df_customerplus_clean[\"Number_compagnie\"]=int(directory_path)\n",
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_kpi],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_tickets_kpi],axis=0)\n",
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_kpi\n",
" products_purchased_reduced_spectacle=df_tickets_kpi"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8d3d4a21-3f99-4c48-a102-af33763f7fa7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>Number_compagnie</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>4667645</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534181.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>4667649</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534177.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>4667660</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534165.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>4667679</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534132.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>4667686</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1567949.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1523688 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"343121 4667645 122 NaN 1534181.0 0 \n",
"343122 4667649 122 NaN 1534177.0 0 \n",
"343123 4667660 122 NaN 1534165.0 0 \n",
"343124 4667679 122 NaN 1534132.0 0 \n",
"343125 4667686 122 NaN 1567949.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"343121 862 False NaN 2 True ... \n",
"343122 862 False NaN 2 True ... \n",
"343123 862 False NaN 0 True ... \n",
"343124 862 False NaN 2 True ... \n",
"343125 862 False NaN 0 True ... \n",
"\n",
" purchase_count first_buying_date country gender_label \\\n",
"0 0 NaN NaN other \n",
"1 0 NaN fr other \n",
"2 14 NaN fr other \n",
"3 1 NaN fr female \n",
"4 1 NaN fr female \n",
"... ... ... ... ... \n",
"343121 0 NaN NaN other \n",
"343122 0 NaN NaN other \n",
"343123 0 NaN NaN female \n",
"343124 0 NaN NaN other \n",
"343125 0 NaN NaN female \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \\\n",
"0 0 0 1 NaN 0 \n",
"1 0 0 1 1.0 0 \n",
"2 0 0 1 1.0 0 \n",
"3 1 0 0 1.0 0 \n",
"4 1 0 0 1.0 0 \n",
"... ... ... ... ... ... \n",
"343121 0 0 1 NaN 0 \n",
"343122 0 0 1 NaN 0 \n",
"343123 1 0 0 NaN 0 \n",
"343124 0 0 1 NaN 0 \n",
"343125 1 0 0 NaN 0 \n",
"\n",
" Number_compagnie \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"... ... \n",
"343121 14 \n",
"343122 14 \n",
"343123 14 \n",
"343124 14 \n",
"343125 14 \n",
"\n",
"[1523688 rows x 29 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "99ce152d-edb9-403a-ad0a-9629ab0f8246",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>Number_compagnie</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>44</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254699</th>\n",
" <td>6837769</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 23:42:15</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254700</th>\n",
" <td>6875038</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254701</th>\n",
" <td>6875066</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254702</th>\n",
" <td>6875099</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254703</th>\n",
" <td>6875143</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:17:01</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>688953 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 29 4 0.0 NaT \n",
"1 37 3 0.0 NaT \n",
"2 39 4 1.0 0 days 05:16:38 \n",
"3 41 4 1.0 0 days 01:12:29 \n",
"4 44 4 0.0 NaT \n",
"... ... ... ... ... \n",
"254699 6837769 1 1.0 0 days 23:42:15 \n",
"254700 6875038 1 0.0 NaT \n",
"254701 6875066 1 0.0 NaT \n",
"254702 6875099 1 0.0 NaT \n",
"254703 6875143 1 1.0 0 days 01:17:01 \n",
"\n",
" Number_compagnie \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"... ... \n",
"254699 14 \n",
"254700 14 \n",
"254701 14 \n",
"254702 14 \n",
"254703 14 \n",
"\n",
"[688953 rows x 5 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"campaigns_information_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "cc6b2dbf-6c99-4517-b3d0-809876045f9f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Number_compagnie</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19482</td>\n",
" <td>88</td>\n",
" <td>29</td>\n",
" <td>872.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2643.092500</td>\n",
" <td>718.149398</td>\n",
" <td>1924.943102</td>\n",
" <td>8.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19484</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>62.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1745.021736</td>\n",
" <td>1743.045035</td>\n",
" <td>1.976701</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19485</td>\n",
" <td>131</td>\n",
" <td>21</td>\n",
" <td>1878.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2649.044745</td>\n",
" <td>85.240845</td>\n",
" <td>2563.803900</td>\n",
" <td>84.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19486</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>96.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1944.077604</td>\n",
" <td>1742.794225</td>\n",
" <td>201.283380</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19487</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>33.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1742.877766</td>\n",
" <td>1742.877766</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99580</th>\n",
" <td>6884747</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.193750</td>\n",
" <td>0.193750</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99581</th>\n",
" <td>6884748</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.186806</td>\n",
" <td>0.186806</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99582</th>\n",
" <td>6884750</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>80.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.136111</td>\n",
" <td>0.136111</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99583</th>\n",
" <td>6884751</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.122917</td>\n",
" <td>0.122917</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99584</th>\n",
" <td>6884753</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.047222</td>\n",
" <td>0.047222</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>764880 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 19482 88 29 872.0 2 \n",
"1 19484 3 2 62.0 1 \n",
"2 19485 131 21 1878.0 2 \n",
"3 19486 10 4 96.0 1 \n",
"4 19487 2 1 33.0 1 \n",
"... ... ... ... ... ... \n",
"99580 6884747 2 1 40.0 1 \n",
"99581 6884748 2 1 40.0 1 \n",
"99582 6884750 4 1 80.0 1 \n",
"99583 6884751 2 1 40.0 1 \n",
"99584 6884753 2 1 40.0 1 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 1 2643.092500 718.149398 \n",
"1 0 1745.021736 1743.045035 \n",
"2 1 2649.044745 85.240845 \n",
"3 0 1944.077604 1742.794225 \n",
"4 0 1742.877766 1742.877766 \n",
"... ... ... ... \n",
"99580 0 0.193750 0.193750 \n",
"99581 0 0.186806 0.186806 \n",
"99582 0 0.136111 0.136111 \n",
"99583 0 0.122917 0.122917 \n",
"99584 0 0.047222 0.047222 \n",
"\n",
" time_between_purchase nb_tickets_internet Number_compagnie \n",
"0 1924.943102 8.0 10 \n",
"1 1.976701 0.0 10 \n",
"2 2563.803900 84.0 10 \n",
"3 201.283380 0.0 10 \n",
"4 0.000000 0.0 10 \n",
"... ... ... ... \n",
"99580 0.000000 0.0 14 \n",
"99581 0.000000 0.0 14 \n",
"99582 0.000000 0.0 14 \n",
"99583 0.000000 0.0 14 \n",
"99584 0.000000 0.0 14 \n",
"\n",
"[764880 rows x 11 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "421c7038-9ee2-4409-a798-ac132c3202ca",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}