BDC-team-1/Spectacle/Exploration_spectacle.ipynb
2024-03-10 16:41:43 +00:00

2177 lines
79 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0eefb67b-5399-44fa-9c1c-7724ec1c7cd2",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import s3fs\n",
"import warnings\n",
"from datetime import date, timedelta, datetime\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "37977b4e-42e7-4d8e-8b9a-6843292fd128",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "cca62d72-f809-41a9-bb06-1be7d6b09307",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4",
"metadata": {},
"outputs": [],
"source": [
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Input/Company_10/customerplus_cleaned.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" df_customerplus_cleaned = pd.read_csv(file_in, sep=\",\")\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "bcdba447-90f7-450c-b4a3-6da656e38493",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_491/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n"
]
}
],
"source": [
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Input/Company_10/products_purchased_reduced.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "637aa400-f49a-4d8d-802a-868b241f8a9d",
"metadata": {},
"outputs": [],
"source": [
"\n",
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e60529b5-986f-4685-91e1-782c2b022e09",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69253</th>\n",
" <td>1698158</td>\n",
" <td>18580</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69254</th>\n",
" <td>1698159</td>\n",
" <td>18569</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69255</th>\n",
" <td>1698160</td>\n",
" <td>2962</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69256</th>\n",
" <td>1698161</td>\n",
" <td>3825</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69257</th>\n",
" <td>1698162</td>\n",
" <td>5731</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>69258 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1165098 618562 Newsletter mensuelle False \n",
"1 1165100 618559 Newsletter mensuelle False \n",
"2 1165101 618561 Newsletter mensuelle False \n",
"3 1165102 618560 Newsletter mensuelle False \n",
"4 1165103 618558 Newsletter mensuelle False \n",
"... ... ... ... ... \n",
"69253 1698158 18580 Newsletter mensuelle False \n",
"69254 1698159 18569 Newsletter mensuelle False \n",
"69255 1698160 2962 Newsletter mensuelle False \n",
"69256 1698161 3825 Newsletter mensuelle False \n",
"69257 1698162 5731 Newsletter mensuelle False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter \n",
"... ... \n",
"69253 manual_static_filter \n",
"69254 manual_static_filter \n",
"69255 manual_static_filter \n",
"69256 manual_static_filter \n",
"69257 manual_static_filter \n",
"\n",
"[69258 rows x 5 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_information"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "6ece1bb3-5a2d-41f8-be96-eb70697881dc",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>44</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57138</th>\n",
" <td>827940</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57139</th>\n",
" <td>827941</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57140</th>\n",
" <td>827942</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57141</th>\n",
" <td>827943</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57142</th>\n",
" <td>827944</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>57143 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_campaigns nb_campaigns_opened time_to_open\n",
"0 29 4 NaN NaT\n",
"1 37 3 NaN NaT\n",
"2 39 4 1.0 0 days 05:16:38\n",
"3 41 4 1.0 0 days 01:12:29\n",
"4 44 4 NaN NaT\n",
"... ... ... ... ...\n",
"57138 827940 1 NaN NaT\n",
"57139 827941 1 NaN NaT\n",
"57140 827942 1 NaN NaT\n",
"57141 827943 1 NaN NaT\n",
"57142 827944 1 NaN NaT\n",
"\n",
"[57143 rows x 4 columns]"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"campaigns_kpi_function(campaigns)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "8c42f4a3-bdbc-44fe-a873-3192b983410d",
"metadata": {},
"outputs": [],
"source": [
"# KPI sur le comportement d'achat\n",
"df_tickets_kpi = tickets_kpi_function(purchases)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "df124880-1e4f-4eaf-b0ef-72bb4f840d45",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"dtype: int64"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_tickets_kpi.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "7e2ab67d-1cf6-41de-804e-23c14e0be7d5",
"metadata": {},
"outputs": [],
"source": [
" # KPI sur le comportement d'achat\n",
" \n",
"df_tickets_kpi = tickets_kpi_function(tickets_information = purchases)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "7be68aa3-16de-4319-93d4-0c28258e3dd8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19482</td>\n",
" <td>88</td>\n",
" <td>29</td>\n",
" <td>872.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2643.092500</td>\n",
" <td>718.149398</td>\n",
" <td>1924.943102</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19484</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>62.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1745.021736</td>\n",
" <td>1743.045035</td>\n",
" <td>1.976701</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19485</td>\n",
" <td>131</td>\n",
" <td>21</td>\n",
" <td>1878.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2649.044745</td>\n",
" <td>85.240845</td>\n",
" <td>2563.803900</td>\n",
" <td>84.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19486</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>96.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1944.077604</td>\n",
" <td>1742.794225</td>\n",
" <td>201.283380</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19487</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>33.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1742.877766</td>\n",
" <td>1742.877766</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26100</th>\n",
" <td>824877</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-12.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5.956111</td>\n",
" <td>5.956111</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26101</th>\n",
" <td>824878</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>12.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5.956921</td>\n",
" <td>5.956921</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26102</th>\n",
" <td>824879</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>-38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5.226238</td>\n",
" <td>5.226238</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26103</th>\n",
" <td>824991</td>\n",
" <td>14</td>\n",
" <td>3</td>\n",
" <td>-100.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3.021539</td>\n",
" <td>3.017222</td>\n",
" <td>0.004317</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26104</th>\n",
" <td>824998</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>25.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.072720</td>\n",
" <td>0.072720</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26105 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 19482 88 29 872.0 2 \n",
"1 19484 3 2 62.0 1 \n",
"2 19485 131 21 1878.0 2 \n",
"3 19486 10 4 96.0 1 \n",
"4 19487 2 1 33.0 1 \n",
"... ... ... ... ... ... \n",
"26100 824877 1 1 -12.0 1 \n",
"26101 824878 1 1 12.0 1 \n",
"26102 824879 2 1 -38.0 1 \n",
"26103 824991 14 3 -100.0 1 \n",
"26104 824998 1 1 25.0 1 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 1 2643.092500 718.149398 \n",
"1 0 1745.021736 1743.045035 \n",
"2 1 2649.044745 85.240845 \n",
"3 0 1944.077604 1742.794225 \n",
"4 0 1742.877766 1742.877766 \n",
"... ... ... ... \n",
"26100 0 5.956111 5.956111 \n",
"26101 0 5.956921 5.956921 \n",
"26102 0 5.226238 5.226238 \n",
"26103 0 3.021539 3.017222 \n",
"26104 0 0.072720 0.072720 \n",
"\n",
" time_between_purchase nb_tickets_internet \n",
"0 1924.943102 8.0 \n",
"1 1.976701 0.0 \n",
"2 2563.803900 84.0 \n",
"3 201.283380 0.0 \n",
"4 0.000000 0.0 \n",
"... ... ... \n",
"26100 0.000000 0.0 \n",
"26101 0.000000 0.0 \n",
"26102 0.000000 0.0 \n",
"26103 0.004317 0.0 \n",
"26104 0.000000 0.0 \n",
"\n",
"[26105 rows x 10 columns]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_tickets_kpi"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "4e8c0d75-117f-4400-8d55-b3ae3f43501b",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>total_price</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98789</th>\n",
" <td>766266</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>181304.0</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98790</th>\n",
" <td>766336</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>178189.0</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98791</th>\n",
" <td>766348</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>178141.0</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98792</th>\n",
" <td>766363</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>176807.0</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98793</th>\n",
" <td>766366</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>176788.0</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>98794 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"98789 766266 139 NaN 181304.0 0 \n",
"98790 766336 139 NaN 178189.0 0 \n",
"98791 766348 139 NaN 178141.0 0 \n",
"98792 766363 139 NaN 176807.0 0 \n",
"98793 766366 139 NaN 176788.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"98789 875 False NaN 2 True ... \n",
"98790 875 False NaN 2 True ... \n",
"98791 875 False NaN 2 True ... \n",
"98792 875 False NaN 2 True ... \n",
"98793 875 False NaN 2 True ... \n",
"\n",
" total_price purchase_count first_buying_date country gender_label \\\n",
"0 0.0 0 NaN NaN other \n",
"1 0.0 0 NaN fr other \n",
"2 NaN 14 NaN fr other \n",
"3 NaN 1 NaN fr female \n",
"4 NaN 1 NaN fr female \n",
"... ... ... ... ... ... \n",
"98789 0.0 0 NaN NaN other \n",
"98790 0.0 0 NaN NaN other \n",
"98791 0.0 0 NaN NaN other \n",
"98792 0.0 0 NaN NaN other \n",
"98793 0.0 0 NaN NaN other \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \n",
"0 0 0 1 NaN 0 \n",
"1 0 0 1 1.0 0 \n",
"2 0 0 1 1.0 0 \n",
"3 1 0 0 1.0 0 \n",
"4 1 0 0 1.0 0 \n",
"... ... ... ... ... ... \n",
"98789 0 0 1 NaN 0 \n",
"98790 0 0 1 NaN 0 \n",
"98791 0 0 1 NaN 0 \n",
"98792 0 0 1 NaN 0 \n",
"98793 0 0 1 NaN 0 \n",
"\n",
"[98794 rows x 28 columns]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" # KPI sur les données socio-démographiques\n",
"df_customerplus_clean = customerplus_kpi_function(df_customerplus_cleaned)\n",
" \n",
"df_customerplus_clean"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "59e3a6f5-97e6-48c6-b3f8-4333a0d94eb5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id 0\n",
"street_id 0\n",
"structure_id 96706\n",
"mcp_contact_id 19094\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 98794\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 73081\n",
"max_price 73081\n",
"ticket_sum 0\n",
"average_price 35539\n",
"average_purchase_delay 73081\n",
"average_price_basket 73081\n",
"average_ticket_basket 73081\n",
"total_price 37542\n",
"purchase_count 0\n",
"first_buying_date 73081\n",
"country 44192\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 44192\n",
"has_tags 0\n",
"dtype: int64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_customerplus_clean.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "484979cc-d4a4-4d9d-9701-71a4f353a372",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_438/1359829443.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n"
]
}
],
"source": [
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Input/Company_10/campaigns_information.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "553ca2e7-ead4-4508-8247-fcc602abd249",
"metadata": {},
"outputs": [],
"source": [
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Input/Company_10/target_information.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" targets = pd.read_csv(file_in, sep=\",\")\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "17b89ca1-deea-4139-a6c0-7822cc4e7a90",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69253</th>\n",
" <td>1698158</td>\n",
" <td>18580</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69254</th>\n",
" <td>1698159</td>\n",
" <td>18569</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69255</th>\n",
" <td>1698160</td>\n",
" <td>2962</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69256</th>\n",
" <td>1698161</td>\n",
" <td>3825</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69257</th>\n",
" <td>1698162</td>\n",
" <td>5731</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>69258 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1165098 618562 Newsletter mensuelle False \n",
"1 1165100 618559 Newsletter mensuelle False \n",
"2 1165101 618561 Newsletter mensuelle False \n",
"3 1165102 618560 Newsletter mensuelle False \n",
"4 1165103 618558 Newsletter mensuelle False \n",
"... ... ... ... ... \n",
"69253 1698158 18580 Newsletter mensuelle False \n",
"69254 1698159 18569 Newsletter mensuelle False \n",
"69255 1698160 2962 Newsletter mensuelle False \n",
"69256 1698161 3825 Newsletter mensuelle False \n",
"69257 1698162 5731 Newsletter mensuelle False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter \n",
"... ... \n",
"69253 manual_static_filter \n",
"69254 manual_static_filter \n",
"69255 manual_static_filter \n",
"69256 manual_static_filter \n",
"69257 manual_static_filter \n",
"\n",
"[69258 rows x 5 columns]"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"targets"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "27a3c2bf-0541-43b4-b62d-4621692f6c66",
"metadata": {},
"outputs": [],
"source": [
"pd.reset_option('display.max_rows',70000)\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "51e57220-021f-4b0f-a2c9-360d612c9f75",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Newsletter mensuelle\n",
"1 Newsletter mensuelle\n",
"2 Newsletter mensuelle\n",
"3 Newsletter mensuelle\n",
"4 Newsletter mensuelle\n",
" ... \n",
"9995 Newsletter mensuelle\n",
"9996 Newsletter mensuelle\n",
"9997 Newsletter mensuelle\n",
"9998 Newsletter mensuelle\n",
"9999 Newsletter mensuelle\n",
"Name: target_name, Length: 10000, dtype: object"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"targets[\"target_name\"].head(10000)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "db3748e6-795e-459c-86dd-3389455af217",
"metadata": {},
"outputs": [],
"source": [
"companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
" 'sport': ['5', '6', '7', '8', '9'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "d6767ba6-94ef-43f9-8f67-15ecdb41a70b",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Choisissez le type de compagnie : sport ? musique ? musee ? musique\n"
]
}
],
"source": [
"type_of_comp = input('Choisissez le type de compagnie : sport ? musique ? musee ?')\n",
"list_of_comp = companies[type_of_comp] \n"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "050963aa-5cdc-4ff2-a380-16efec89adf0",
"metadata": {},
"outputs": [],
"source": [
"# Dossier d'exportation\n",
"BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}'"
]
},
{
"cell_type": "code",
"execution_count": 100,
"id": "21a32b69-de53-45ce-9e31-22c45c223924",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Generalization/musique'"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BUCKET_OUT"
]
},
{
"cell_type": "code",
"execution_count": 96,
"id": "177c4742-5ec6-4326-b984-09e673791801",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Generalization/musique'"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'projet-bdc2324-team1/Generalization/musique'"
]
},
{
"cell_type": "code",
"execution_count": 98,
"id": "80c6d397-117e-493d-ab0f-7698dbfa8cc4",
"metadata": {},
"outputs": [],
"source": [
"def display_covering_time(df, company, datecover):\n",
" \"\"\"\n",
" This function draws the time coverage of each company\n",
" \"\"\"\n",
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
" return datecover\n",
"\n",
"\n",
"def compute_time_intersection(datecover):\n",
" \"\"\"\n",
" This function returns the time coverage for all companies\n",
" \"\"\"\n",
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
" intersection = set.intersection(*timestamps_sets)\n",
" intersection_list = list(intersection)\n",
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
" return sorted(formated_dates)\n",
"\n",
"\n",
"def df_coverage_modelization(sport, coverage_train = 0.7):\n",
" \"\"\"\n",
" This function returns start_date, end_of_features and final dates\n",
" that help to construct train and test datasets\n",
" \"\"\"\n",
" datecover = {}\n",
" for company in sport:\n",
" df_products_purchased_reduced = display_databases(company, file_name = \"products_purchased_reduced\",\n",
" datetime_col = ['purchase_date'])\n",
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
" #print(datecover.keys())\n",
" dt_coverage = compute_time_intersection(datecover)\n",
" start_date = dt_coverage[0]\n",
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
" final_date = dt_coverage[-1]\n",
" return start_date, end_of_features, final_date\n",
" \n",
"\n",
"def dataset_construction(min_date, end_features_date, max_date, directory_path):\n",
" \n",
" # Import customerplus\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
" \n",
" # Filtre de cohérence pour la mise en pratique de notre méthode\n",
" max_date = pd.to_datetime(max_date, utc = True, format = 'ISO8601') \n",
" end_features_date = pd.to_datetime(end_features_date, utc = True, format = 'ISO8601')\n",
" min_date = pd.to_datetime(min_date, utc = True, format = 'ISO8601')\n",
"\n",
" #Filtre de la base df_campaigns_information\n",
" df_campaigns_information = df_campaigns_information[(df_campaigns_information['sent_at'] <= end_features_date) & (df_campaigns_information['sent_at'] >= min_date)]\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
" \n",
" #Filtre de la base df_products_purchased_reduced\n",
" df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]\n",
"\n",
" print(\"Data filtering : SUCCESS\")\n",
" \n",
" # Fusion de l'ensemble et creation des KPI\n",
"\n",
" # KPI sur les campagnes publicitaires\n",
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
"\n",
" # KPI sur le comportement d'achat\n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
"\n",
" # KPI sur les données socio-démographiques\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
" \n",
" print(\"KPIs construction : SUCCESS\")\n",
" \n",
" # Fusion avec KPI liés au customer\n",
" df_customer = pd.merge(df_customerplus_clean, df_campaigns_kpi, on = 'customer_id', how = 'left')\n",
" \n",
" # Fill NaN values\n",
" df_customer[['nb_campaigns', 'nb_campaigns_opened']] = df_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)\n",
" \n",
" # Fusion avec KPI liés au comportement d'achat\n",
" df_customer_product = pd.merge(df_tickets_kpi, df_customer, on = 'customer_id', how = 'outer')\n",
" \n",
" # Fill NaN values\n",
" df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)\n",
"\n",
" print(\"Explanatory variable construction : SUCCESS\")\n",
"\n",
" # 2. Construction of the explained variable \n",
" df_products_purchased_to_predict = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= max_date) & (df_products_purchased_reduced['purchase_date'] > end_features_date)]\n",
"\n",
" # Indicatrice d'achat\n",
" df_products_purchased_to_predict['y_has_purchased'] = 1\n",
"\n",
" y = df_products_purchased_to_predict[['customer_id', 'y_has_purchased']].drop_duplicates()\n",
"\n",
" print(\"Explained variable construction : SUCCESS\")\n",
" \n",
" # 3. Merge between explained and explanatory variables\n",
" dataset = pd.merge(df_customer_product, y, on = ['customer_id'], how = 'left')\n",
"\n",
" # 0 if there is no purchase\n",
" dataset[['y_has_purchased']].fillna(0) \n",
" \n",
" return dataset"
]
},
{
"cell_type": "code",
"execution_count": 99,
"id": "2a746097-0cbf-4bd6-b13b-6ee3e5c36fad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Couverture Company 10 : 2016-03-07 - 2023-09-25\n",
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Couverture Company 11 : 2015-06-26 - 2023-11-08\n",
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Couverture Company 12 : 2016-06-14 - 2023-11-08\n",
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Couverture Company 13 : 2010-07-31 - 2023-11-08\n",
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Couverture Company 14 : 1901-01-01 - 2023-11-08\n",
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!\n",
"You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.\n",
"A typical example is when you are setting values in a column of a DataFrame, like:\n",
"\n",
"df[\"col\"][row_indexer] = value\n",
"\n",
"Use `df.loc[row_indexer, \"col\"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'NaT' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data filtering : SUCCESS\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"KPIs construction : SUCCESS\n",
"Explanatory variable construction : SUCCESS\n",
"Explained variable construction : SUCCESS\n",
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!\n",
"You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.\n",
"A typical example is when you are setting values in a column of a DataFrame, like:\n",
"\n",
"df[\"col\"][row_indexer] = value\n",
"\n",
"Use `df.loc[row_indexer, \"col\"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'NaT' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data filtering : SUCCESS\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"KPIs construction : SUCCESS\n",
"Explanatory variable construction : SUCCESS\n",
"Explained variable construction : SUCCESS\n",
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!\n",
"You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.\n",
"A typical example is when you are setting values in a column of a DataFrame, like:\n",
"\n",
"df[\"col\"][row_indexer] = value\n",
"\n",
"Use `df.loc[row_indexer, \"col\"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data filtering : SUCCESS\n",
"KPIs construction : SUCCESS\n",
"Explanatory variable construction : SUCCESS\n",
"Explained variable construction : SUCCESS\n",
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!\n",
"You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.\n",
"A typical example is when you are setting values in a column of a DataFrame, like:\n",
"\n",
"df[\"col\"][row_indexer] = value\n",
"\n",
"Use `df.loc[row_indexer, \"col\"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'NaT' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data filtering : SUCCESS\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"KPIs construction : SUCCESS\n",
"Explanatory variable construction : SUCCESS\n",
"Explained variable construction : SUCCESS\n",
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: ChainedAssignmentError: behaviour will change in pandas 3.0!\n",
"You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.\n",
"A typical example is when you are setting values in a column of a DataFrame, like:\n",
"\n",
"df[\"col\"][row_indexer] = value\n",
"\n",
"Use `df.loc[row_indexer, \"col\"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
"/tmp/ipykernel_438/573049956.py:55: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'NaT' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.\n",
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data filtering : SUCCESS\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"KPIs construction : SUCCESS\n",
"Explanatory variable construction : SUCCESS\n",
"Explained variable construction : SUCCESS\n"
]
}
],
"source": [
"# Create test dataset and train dataset for sport companies\n",
"\n",
"start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)\n",
"\n",
"for company in list_of_comp:\n",
" dataset_test = dataset_construction(min_date = start_date, end_features_date = end_of_features,\n",
" max_date = final_date, directory_path = company) "
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "01900e04-61e7-4a1b-8c9c-b72e42ba9507",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exportation dataset test : SUCCESS\n"
]
}
],
"source": [
" # Exportation\n",
"FILE_KEY_OUT_S3 = \"dataset_test\" + company + \".csv\"\n",
"FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n",
" \n",
"with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
" dataset_test.to_csv(file_out, index = False)\n",
" \n",
"print(\"Exportation dataset test : SUCCESS\")"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "b0de2e18-edff-416c-b623-e3e23016029d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Generalization/musique/dataset_test14.csv'"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"FILE_PATH_OUT_S3"
]
},
{
"cell_type": "code",
"execution_count": 105,
"id": "8f56d6ee-82c9-43e2-813d-33d6aaa458dd",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'dataset_test14' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[105], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdataset_test14\u001b[49m\n",
"\u001b[0;31mNameError\u001b[0m: name 'dataset_test14' is not defined"
]
}
],
"source": [
"dataset_test14"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9232a8df-c51a-4f10-9fc8-ce4f8ad8aab4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}