1578 lines
58 KiB
Plaintext
1578 lines
58 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "56b3d44e-1e3f-4726-9916-0f9af107860e",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Business Data Challenge - Team 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "15103481-8d74-404c-aa09-7601fe7730da",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c3bb0d13-34b2-4e1c-9985-468cd87c5a0e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Configuration de l'accès aux données"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f99da24f-0d93-4618-92bc-3ba81dc0445c",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Exemple sur bdc2324-data/11"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/2/2campaign_stats.csv',\n",
|
||
" 'bdc2324-data/2/2campaigns.csv',\n",
|
||
" 'bdc2324-data/2/2categories.csv',\n",
|
||
" 'bdc2324-data/2/2contribution_sites.csv',\n",
|
||
" 'bdc2324-data/2/2contributions.csv',\n",
|
||
" 'bdc2324-data/2/2countries.csv',\n",
|
||
" 'bdc2324-data/2/2currencies.csv',\n",
|
||
" 'bdc2324-data/2/2customer_target_mappings.csv',\n",
|
||
" 'bdc2324-data/2/2customersplus.csv',\n",
|
||
" 'bdc2324-data/2/2event_types.csv',\n",
|
||
" 'bdc2324-data/2/2events.csv',\n",
|
||
" 'bdc2324-data/2/2facilities.csv',\n",
|
||
" 'bdc2324-data/2/2link_stats.csv',\n",
|
||
" 'bdc2324-data/2/2pricing_formulas.csv',\n",
|
||
" 'bdc2324-data/2/2product_packs.csv',\n",
|
||
" 'bdc2324-data/2/2products.csv',\n",
|
||
" 'bdc2324-data/2/2products_groups.csv',\n",
|
||
" 'bdc2324-data/2/2purchases.csv',\n",
|
||
" 'bdc2324-data/2/2representation_category_capacities.csv',\n",
|
||
" 'bdc2324-data/2/2representations.csv',\n",
|
||
" 'bdc2324-data/2/2seasons.csv',\n",
|
||
" 'bdc2324-data/2/2structure_tag_mappings.csv',\n",
|
||
" 'bdc2324-data/2/2suppliers.csv',\n",
|
||
" 'bdc2324-data/2/2tags.csv',\n",
|
||
" 'bdc2324-data/2/2target_types.csv',\n",
|
||
" 'bdc2324-data/2/2targets.csv',\n",
|
||
" 'bdc2324-data/2/2tickets.csv']"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"BUCKET = \"bdc2324-data/2\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "779da86b-ac61-4c61-88d2-fa1c0c19efce",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Type de client au globale"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "d22aa131-5069-43d4-a42e-24f38cc7240d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Index(['id', 'customer_id', 'target_id', 'created_at', 'updated_at', 'name',\n",
|
||
" 'extra_field'],\n",
|
||
" dtype='object')\n",
|
||
"(124302, 7)\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 124302 entries, 0 to 124301\n",
|
||
"Data columns (total 7 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 id 124302 non-null int64 \n",
|
||
" 1 customer_id 124302 non-null int64 \n",
|
||
" 2 target_id 124302 non-null int64 \n",
|
||
" 3 created_at 124296 non-null object \n",
|
||
" 4 updated_at 124296 non-null object \n",
|
||
" 5 name 0 non-null float64\n",
|
||
" 6 extra_field 0 non-null float64\n",
|
||
"dtypes: float64(2), int64(3), object(2)\n",
|
||
"memory usage: 6.6+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Segmentation existante\n",
|
||
"FILE_PATH_S3 = 'bdc2324-data/11/11customer_target_mappings.csv'\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" customer_target_mappings = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"print(customer_target_mappings.columns)\n",
|
||
"print(customer_target_mappings.shape)\n",
|
||
"customer_target_mappings.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "967b20e2-5a30-4724-989f-b9e39c7c67e7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>793889</td>\n",
|
||
" <td>344151</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-09-29 17:55:41.083666+02:00</td>\n",
|
||
" <td>2022-09-29 17:55:41.083666+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>793890</td>\n",
|
||
" <td>344152</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-09-29 19:16:07.252114+02:00</td>\n",
|
||
" <td>2022-09-29 19:16:07.252114+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>793891</td>\n",
|
||
" <td>344153</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-09-29 19:55:10.443450+02:00</td>\n",
|
||
" <td>2022-09-29 19:55:10.443450+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>793892</td>\n",
|
||
" <td>344154</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-09-29 20:16:08.269407+02:00</td>\n",
|
||
" <td>2022-09-29 20:16:08.269407+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>793893</td>\n",
|
||
" <td>344155</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-09-29 21:03:40.541998+02:00</td>\n",
|
||
" <td>2022-09-29 21:03:40.541998+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>124297</th>\n",
|
||
" <td>742001</td>\n",
|
||
" <td>329855</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-07-11 18:17:09.607162+02:00</td>\n",
|
||
" <td>2022-07-11 18:17:09.607162+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>124298</th>\n",
|
||
" <td>742002</td>\n",
|
||
" <td>329856</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-07-11 18:44:45.636248+02:00</td>\n",
|
||
" <td>2022-07-11 18:44:45.636248+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>124299</th>\n",
|
||
" <td>742000</td>\n",
|
||
" <td>329854</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-07-11 17:46:48.914507+02:00</td>\n",
|
||
" <td>2022-07-11 17:46:48.914507+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>124300</th>\n",
|
||
" <td>742003</td>\n",
|
||
" <td>329857</td>\n",
|
||
" <td>134</td>\n",
|
||
" <td>2022-07-11 18:44:55.915889+02:00</td>\n",
|
||
" <td>2022-07-11 18:44:55.915889+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>124301</th>\n",
|
||
" <td>741996</td>\n",
|
||
" <td>329850</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>2022-07-11 16:52:37.227487+02:00</td>\n",
|
||
" <td>2022-07-11 16:52:37.227487+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>124302 rows × 7 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_id created_at \\\n",
|
||
"0 793889 344151 101 2022-09-29 17:55:41.083666+02:00 \n",
|
||
"1 793890 344152 101 2022-09-29 19:16:07.252114+02:00 \n",
|
||
"2 793891 344153 101 2022-09-29 19:55:10.443450+02:00 \n",
|
||
"3 793892 344154 101 2022-09-29 20:16:08.269407+02:00 \n",
|
||
"4 793893 344155 101 2022-09-29 21:03:40.541998+02:00 \n",
|
||
"... ... ... ... ... \n",
|
||
"124297 742001 329855 101 2022-07-11 18:17:09.607162+02:00 \n",
|
||
"124298 742002 329856 101 2022-07-11 18:44:45.636248+02:00 \n",
|
||
"124299 742000 329854 101 2022-07-11 17:46:48.914507+02:00 \n",
|
||
"124300 742003 329857 134 2022-07-11 18:44:55.915889+02:00 \n",
|
||
"124301 741996 329850 101 2022-07-11 16:52:37.227487+02:00 \n",
|
||
"\n",
|
||
" updated_at name extra_field \n",
|
||
"0 2022-09-29 17:55:41.083666+02:00 NaN NaN \n",
|
||
"1 2022-09-29 19:16:07.252114+02:00 NaN NaN \n",
|
||
"2 2022-09-29 19:55:10.443450+02:00 NaN NaN \n",
|
||
"3 2022-09-29 20:16:08.269407+02:00 NaN NaN \n",
|
||
"4 2022-09-29 21:03:40.541998+02:00 NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"124297 2022-07-11 18:17:09.607162+02:00 NaN NaN \n",
|
||
"124298 2022-07-11 18:44:45.636248+02:00 NaN NaN \n",
|
||
"124299 2022-07-11 17:46:48.914507+02:00 NaN NaN \n",
|
||
"124300 2022-07-11 18:44:55.915889+02:00 NaN NaN \n",
|
||
"124301 2022-07-11 16:52:37.227487+02:00 NaN NaN \n",
|
||
"\n",
|
||
"[124302 rows x 7 columns]"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_target_mappings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "c4b6bdcc-9f13-449b-9a8b-c5ca794637be",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([nan])"
|
||
]
|
||
},
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_target_mappings['extra_field'].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "47bc8453-0693-4838-8bd8-4d800a82c496",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([nan])"
|
||
]
|
||
},
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_target_mappings['name'].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "ab3f937b-ef62-499a-8ee2-d47d1d988ace",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Index(['id', 'is_import', 'name', 'created_at', 'updated_at', 'identifier'], dtype='object')\n",
|
||
"(4, 6)\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 4 entries, 0 to 3\n",
|
||
"Data columns (total 6 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 id 4 non-null int64 \n",
|
||
" 1 is_import 4 non-null bool \n",
|
||
" 2 name 4 non-null object\n",
|
||
" 3 created_at 4 non-null object\n",
|
||
" 4 updated_at 4 non-null object\n",
|
||
" 5 identifier 4 non-null object\n",
|
||
"dtypes: bool(1), int64(1), object(4)\n",
|
||
"memory usage: 292.0+ bytes\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Segmentation existante\n",
|
||
"FILE_PATH_S3 = 'bdc2324-data/11/11target_types.csv'\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" target_types = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"print(target_types.columns)\n",
|
||
"print(target_types.shape)\n",
|
||
"target_types.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "b70488b9-38fc-40a8-9e2f-3330b3f9eef5",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>is_import</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" <td>2021-04-29 13:42:14.111085+02:00</td>\n",
|
||
" <td>2021-04-29 13:42:14.111085+02:00</td>\n",
|
||
" <td>fb27e81baa4debc6a4e1a8639c20e808</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>manual_structure</td>\n",
|
||
" <td>2021-05-07 15:20:00.626650+02:00</td>\n",
|
||
" <td>2021-05-07 15:20:00.626650+02:00</td>\n",
|
||
" <td>382bca214204a2d3462f5ec2728d5d1e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_dynamic_filter</td>\n",
|
||
" <td>2021-09-09 14:27:47.641302+02:00</td>\n",
|
||
" <td>2021-09-09 14:27:47.641302+02:00</td>\n",
|
||
" <td>e0f4b8693184850fefd6d2a38f10584e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>manual_import</td>\n",
|
||
" <td>2021-04-29 13:49:30.107110+02:00</td>\n",
|
||
" <td>2021-04-29 13:49:30.107110+02:00</td>\n",
|
||
" <td>12213df2ce68a624e4c0070521437bac</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id is_import name created_at \\\n",
|
||
"0 1 False manual_static_filter 2021-04-29 13:42:14.111085+02:00 \n",
|
||
"1 3 True manual_structure 2021-05-07 15:20:00.626650+02:00 \n",
|
||
"2 6 False manual_dynamic_filter 2021-09-09 14:27:47.641302+02:00 \n",
|
||
"3 2 True manual_import 2021-04-29 13:49:30.107110+02:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-04-29 13:42:14.111085+02:00 fb27e81baa4debc6a4e1a8639c20e808 \n",
|
||
"1 2021-05-07 15:20:00.626650+02:00 382bca214204a2d3462f5ec2728d5d1e \n",
|
||
"2 2021-09-09 14:27:47.641302+02:00 e0f4b8693184850fefd6d2a38f10584e \n",
|
||
"3 2021-04-29 13:49:30.107110+02:00 12213df2ce68a624e4c0070521437bac "
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"target_types"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "8dd74e87-97c2-493d-b19f-971b684078d3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Index(['id', 'name', 'created_at', 'updated_at', 'identifier'], dtype='object')\n",
|
||
"(20, 5)\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 20 entries, 0 to 19\n",
|
||
"Data columns (total 5 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 id 20 non-null int64 \n",
|
||
" 1 name 19 non-null object\n",
|
||
" 2 created_at 20 non-null object\n",
|
||
" 3 updated_at 20 non-null object\n",
|
||
" 4 identifier 20 non-null object\n",
|
||
"dtypes: int64(1), object(4)\n",
|
||
"memory usage: 928.0+ bytes\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Tags = clients\n",
|
||
"FILE_PATH_S3 = 'bdc2324-data/11/11tags.csv'\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" tags = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"print(tags.columns)\n",
|
||
"print(tags.shape)\n",
|
||
"tags.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "91d54732-666c-4250-ba91-5c9b83d4712a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>ens-écoles</td>\n",
|
||
" <td>2021-05-07 15:24:19.808501+02:00</td>\n",
|
||
" <td>2021-05-07 15:24:19.808501+02:00</td>\n",
|
||
" <td>b6a360c5f84595940c5774f13fd39cc3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-05-07 15:24:19.805589+02:00</td>\n",
|
||
" <td>2021-05-07 15:24:19.805589+02:00</td>\n",
|
||
" <td>d41d8cd98f00b204e9800998ecf8427e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>ecoles primaires rennes</td>\n",
|
||
" <td>2021-05-07 15:29:06.388415+02:00</td>\n",
|
||
" <td>2021-05-07 15:29:06.388415+02:00</td>\n",
|
||
" <td>ca8649dd64c240d118f60b07d11a7053</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Angers Nantes Opéra</td>\n",
|
||
" <td>2023-01-27 15:59:58.187557+01:00</td>\n",
|
||
" <td>2023-01-27 15:59:58.187557+01:00</td>\n",
|
||
" <td>f8f500f937fe312542399299cdc13f7e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>Opéras</td>\n",
|
||
" <td>2023-01-27 16:03:59.654938+01:00</td>\n",
|
||
" <td>2023-01-27 16:03:59.654938+01:00</td>\n",
|
||
" <td>22eb2c616983ec7b54a093f84b230505</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>Ministère de la Culture</td>\n",
|
||
" <td>2023-01-30 11:22:29.636813+01:00</td>\n",
|
||
" <td>2023-01-30 11:22:29.636813+01:00</td>\n",
|
||
" <td>1b8c5c08fde000d90905a3d14af7763d</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>Orchestres</td>\n",
|
||
" <td>2023-01-30 11:33:56.392799+01:00</td>\n",
|
||
" <td>2023-01-30 11:33:56.392799+01:00</td>\n",
|
||
" <td>7c2aee0c80642d7e325a450f2dec45e5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>Cooperative</td>\n",
|
||
" <td>2023-01-31 14:44:38.471146+01:00</td>\n",
|
||
" <td>2023-01-31 14:44:38.471146+01:00</td>\n",
|
||
" <td>6c88c36ffaab88d255865aa3111d7686</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>Théâtres</td>\n",
|
||
" <td>2023-01-31 14:45:17.804428+01:00</td>\n",
|
||
" <td>2023-01-31 14:45:17.804428+01:00</td>\n",
|
||
" <td>b2c19672df82021702b79482c8cda85a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>La co[opera]tive</td>\n",
|
||
" <td>2023-02-16 17:11:35.004478+01:00</td>\n",
|
||
" <td>2023-02-16 17:11:35.004478+01:00</td>\n",
|
||
" <td>5dbaa3a1f278c0fcf981d447ad20957a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>Ville de Rennes</td>\n",
|
||
" <td>2023-02-16 17:37:13.816196+01:00</td>\n",
|
||
" <td>2023-02-16 17:37:13.816196+01:00</td>\n",
|
||
" <td>bc483d04d9c3a08f167a3ce64366ca72</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>Ensembles en résidence</td>\n",
|
||
" <td>2023-02-16 17:55:54.877374+01:00</td>\n",
|
||
" <td>2023-02-16 17:55:54.877374+01:00</td>\n",
|
||
" <td>e70635e771de13268dccf02bb2abfaf9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>Ministère</td>\n",
|
||
" <td>2023-02-17 11:17:54.429462+01:00</td>\n",
|
||
" <td>2023-02-17 11:17:54.429462+01:00</td>\n",
|
||
" <td>a3f0582853fd19f5b57e3651f8a20e7a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>15</td>\n",
|
||
" <td>Rennes métropole</td>\n",
|
||
" <td>2023-02-17 11:53:24.490786+01:00</td>\n",
|
||
" <td>2023-02-17 11:53:24.490786+01:00</td>\n",
|
||
" <td>e98b8db5941b96c29c353b6f2f502055</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>16</td>\n",
|
||
" <td>Ville de Rennes - équipements culturels</td>\n",
|
||
" <td>2023-02-17 12:00:10.649104+01:00</td>\n",
|
||
" <td>2023-02-17 12:00:10.649104+01:00</td>\n",
|
||
" <td>a44edffc7edb852982efa7f4aa6d0e25</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>17</td>\n",
|
||
" <td>Structures culturelles rennaises</td>\n",
|
||
" <td>2023-02-17 12:05:55.583016+01:00</td>\n",
|
||
" <td>2023-02-17 12:05:55.583016+01:00</td>\n",
|
||
" <td>241550517e4e3b1c926e9aeab0f621cd</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Université Rennes 2</td>\n",
|
||
" <td>2023-02-17 14:23:44.832959+01:00</td>\n",
|
||
" <td>2023-02-17 14:23:44.832959+01:00</td>\n",
|
||
" <td>4057c5cee51c4e10aa819f0cf48adc3f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>19</td>\n",
|
||
" <td>Centres chorégraphiques nationaux</td>\n",
|
||
" <td>2023-02-17 15:29:41.827321+01:00</td>\n",
|
||
" <td>2023-02-17 15:29:41.827321+01:00</td>\n",
|
||
" <td>41e75941dfb766365498d917abe0102f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>20</td>\n",
|
||
" <td>Télévision</td>\n",
|
||
" <td>2023-02-17 15:46:13.746092+01:00</td>\n",
|
||
" <td>2023-02-17 15:46:13.746092+01:00</td>\n",
|
||
" <td>36d6409c539dd79c1f3af8c5948603eb</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>21</td>\n",
|
||
" <td>structures culturelles nationales</td>\n",
|
||
" <td>2023-02-17 15:56:00.555722+01:00</td>\n",
|
||
" <td>2023-02-17 15:56:00.555722+01:00</td>\n",
|
||
" <td>5311cf7e42aac53289e1c4a338d5cfa4</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name \\\n",
|
||
"0 2 ens-écoles \n",
|
||
"1 1 NaN \n",
|
||
"2 4 ecoles primaires rennes \n",
|
||
"3 5 Angers Nantes Opéra \n",
|
||
"4 6 Opéras \n",
|
||
"5 7 Ministère de la Culture \n",
|
||
"6 8 Orchestres \n",
|
||
"7 9 Cooperative \n",
|
||
"8 10 Théâtres \n",
|
||
"9 11 La co[opera]tive \n",
|
||
"10 12 Ville de Rennes \n",
|
||
"11 13 Ensembles en résidence \n",
|
||
"12 14 Ministère \n",
|
||
"13 15 Rennes métropole \n",
|
||
"14 16 Ville de Rennes - équipements culturels \n",
|
||
"15 17 Structures culturelles rennaises \n",
|
||
"16 18 Université Rennes 2 \n",
|
||
"17 19 Centres chorégraphiques nationaux \n",
|
||
"18 20 Télévision \n",
|
||
"19 21 structures culturelles nationales \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2021-05-07 15:24:19.808501+02:00 2021-05-07 15:24:19.808501+02:00 \n",
|
||
"1 2021-05-07 15:24:19.805589+02:00 2021-05-07 15:24:19.805589+02:00 \n",
|
||
"2 2021-05-07 15:29:06.388415+02:00 2021-05-07 15:29:06.388415+02:00 \n",
|
||
"3 2023-01-27 15:59:58.187557+01:00 2023-01-27 15:59:58.187557+01:00 \n",
|
||
"4 2023-01-27 16:03:59.654938+01:00 2023-01-27 16:03:59.654938+01:00 \n",
|
||
"5 2023-01-30 11:22:29.636813+01:00 2023-01-30 11:22:29.636813+01:00 \n",
|
||
"6 2023-01-30 11:33:56.392799+01:00 2023-01-30 11:33:56.392799+01:00 \n",
|
||
"7 2023-01-31 14:44:38.471146+01:00 2023-01-31 14:44:38.471146+01:00 \n",
|
||
"8 2023-01-31 14:45:17.804428+01:00 2023-01-31 14:45:17.804428+01:00 \n",
|
||
"9 2023-02-16 17:11:35.004478+01:00 2023-02-16 17:11:35.004478+01:00 \n",
|
||
"10 2023-02-16 17:37:13.816196+01:00 2023-02-16 17:37:13.816196+01:00 \n",
|
||
"11 2023-02-16 17:55:54.877374+01:00 2023-02-16 17:55:54.877374+01:00 \n",
|
||
"12 2023-02-17 11:17:54.429462+01:00 2023-02-17 11:17:54.429462+01:00 \n",
|
||
"13 2023-02-17 11:53:24.490786+01:00 2023-02-17 11:53:24.490786+01:00 \n",
|
||
"14 2023-02-17 12:00:10.649104+01:00 2023-02-17 12:00:10.649104+01:00 \n",
|
||
"15 2023-02-17 12:05:55.583016+01:00 2023-02-17 12:05:55.583016+01:00 \n",
|
||
"16 2023-02-17 14:23:44.832959+01:00 2023-02-17 14:23:44.832959+01:00 \n",
|
||
"17 2023-02-17 15:29:41.827321+01:00 2023-02-17 15:29:41.827321+01:00 \n",
|
||
"18 2023-02-17 15:46:13.746092+01:00 2023-02-17 15:46:13.746092+01:00 \n",
|
||
"19 2023-02-17 15:56:00.555722+01:00 2023-02-17 15:56:00.555722+01:00 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 b6a360c5f84595940c5774f13fd39cc3 \n",
|
||
"1 d41d8cd98f00b204e9800998ecf8427e \n",
|
||
"2 ca8649dd64c240d118f60b07d11a7053 \n",
|
||
"3 f8f500f937fe312542399299cdc13f7e \n",
|
||
"4 22eb2c616983ec7b54a093f84b230505 \n",
|
||
"5 1b8c5c08fde000d90905a3d14af7763d \n",
|
||
"6 7c2aee0c80642d7e325a450f2dec45e5 \n",
|
||
"7 6c88c36ffaab88d255865aa3111d7686 \n",
|
||
"8 b2c19672df82021702b79482c8cda85a \n",
|
||
"9 5dbaa3a1f278c0fcf981d447ad20957a \n",
|
||
"10 bc483d04d9c3a08f167a3ce64366ca72 \n",
|
||
"11 e70635e771de13268dccf02bb2abfaf9 \n",
|
||
"12 a3f0582853fd19f5b57e3651f8a20e7a \n",
|
||
"13 e98b8db5941b96c29c353b6f2f502055 \n",
|
||
"14 a44edffc7edb852982efa7f4aa6d0e25 \n",
|
||
"15 241550517e4e3b1c926e9aeab0f621cd \n",
|
||
"16 4057c5cee51c4e10aa819f0cf48adc3f \n",
|
||
"17 41e75941dfb766365498d917abe0102f \n",
|
||
"18 36d6409c539dd79c1f3af8c5948603eb \n",
|
||
"19 5311cf7e42aac53289e1c4a338d5cfa4 "
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tags"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "4cc9f444-b7e6-4ee5-8ce8-64c63ab7825a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Index(['id', 'structure_id', 'tag_id', 'created_at', 'updated_at'], dtype='object')\n",
|
||
"(179, 5)\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 179 entries, 0 to 178\n",
|
||
"Data columns (total 5 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 id 179 non-null int64 \n",
|
||
" 1 structure_id 179 non-null int64 \n",
|
||
" 2 tag_id 179 non-null int64 \n",
|
||
" 3 created_at 179 non-null object\n",
|
||
" 4 updated_at 179 non-null object\n",
|
||
"dtypes: int64(3), object(2)\n",
|
||
"memory usage: 7.1+ KB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Structure = clients\n",
|
||
"FILE_PATH_S3 = 'bdc2324-data/11/11structure_tag_mappings.csv'\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" structure_tag_mappings = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"print(structure_tag_mappings.columns)\n",
|
||
"print(structure_tag_mappings.shape)\n",
|
||
"structure_tag_mappings.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "dcf776df-5c8e-4972-b2c1-b41291ba7e66",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>tag_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>123</td>\n",
|
||
" <td>187</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>2023-01-27 16:03:59.680222+01:00</td>\n",
|
||
" <td>2023-01-27 16:03:59.680222+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2021-05-07 15:24:19.872895+02:00</td>\n",
|
||
" <td>2021-05-07 15:24:19.872895+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2021-05-07 15:24:19.873830+02:00</td>\n",
|
||
" <td>2021-05-07 15:24:19.873830+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2021-05-07 15:24:19.874628+02:00</td>\n",
|
||
" <td>2021-05-07 15:24:19.874628+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2021-05-07 15:24:19.875421+02:00</td>\n",
|
||
" <td>2021-05-07 15:24:19.875421+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>174</th>\n",
|
||
" <td>184</td>\n",
|
||
" <td>236</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2023-02-17 16:35:25.041114+01:00</td>\n",
|
||
" <td>2023-02-17 16:35:25.041114+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>175</th>\n",
|
||
" <td>185</td>\n",
|
||
" <td>237</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>2023-02-17 16:39:10.799478+01:00</td>\n",
|
||
" <td>2023-02-17 16:39:10.799478+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>176</th>\n",
|
||
" <td>186</td>\n",
|
||
" <td>238</td>\n",
|
||
" <td>19</td>\n",
|
||
" <td>2023-02-17 16:53:21.098690+01:00</td>\n",
|
||
" <td>2023-02-17 16:53:21.098690+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>177</th>\n",
|
||
" <td>187</td>\n",
|
||
" <td>239</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2023-02-17 16:57:42.623481+01:00</td>\n",
|
||
" <td>2023-02-17 16:57:42.623481+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>178</th>\n",
|
||
" <td>188</td>\n",
|
||
" <td>240</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2023-02-17 16:59:22.067723+01:00</td>\n",
|
||
" <td>2023-02-17 16:59:22.067723+01:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>179 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id structure_id tag_id created_at \\\n",
|
||
"0 123 187 6 2023-01-27 16:03:59.680222+01:00 \n",
|
||
"1 2 2 2 2021-05-07 15:24:19.872895+02:00 \n",
|
||
"2 3 3 2 2021-05-07 15:24:19.873830+02:00 \n",
|
||
"3 4 4 2 2021-05-07 15:24:19.874628+02:00 \n",
|
||
"4 5 5 2 2021-05-07 15:24:19.875421+02:00 \n",
|
||
".. ... ... ... ... \n",
|
||
"174 184 236 10 2023-02-17 16:35:25.041114+01:00 \n",
|
||
"175 185 237 17 2023-02-17 16:39:10.799478+01:00 \n",
|
||
"176 186 238 19 2023-02-17 16:53:21.098690+01:00 \n",
|
||
"177 187 239 10 2023-02-17 16:57:42.623481+01:00 \n",
|
||
"178 188 240 10 2023-02-17 16:59:22.067723+01:00 \n",
|
||
"\n",
|
||
" updated_at \n",
|
||
"0 2023-01-27 16:03:59.680222+01:00 \n",
|
||
"1 2021-05-07 15:24:19.872895+02:00 \n",
|
||
"2 2021-05-07 15:24:19.873830+02:00 \n",
|
||
"3 2021-05-07 15:24:19.874628+02:00 \n",
|
||
"4 2021-05-07 15:24:19.875421+02:00 \n",
|
||
".. ... \n",
|
||
"174 2023-02-17 16:35:25.041114+01:00 \n",
|
||
"175 2023-02-17 16:39:10.799478+01:00 \n",
|
||
"176 2023-02-17 16:53:21.098690+01:00 \n",
|
||
"177 2023-02-17 16:57:42.623481+01:00 \n",
|
||
"178 2023-02-17 16:59:22.067723+01:00 \n",
|
||
"\n",
|
||
"[179 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"structure_tag_mappings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "41bf1529-5a7c-409e-9791-2024c08c11f0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Index(['id', 'lastname', 'firstname', 'birthdate', 'email', 'street_id',\n",
|
||
" 'created_at', 'updated_at', 'civility', 'is_partner', 'extra',\n",
|
||
" 'deleted_at', 'reference', 'gender', 'is_email_true', 'extra_field',\n",
|
||
" 'identifier', 'opt_in', 'structure_id', 'note', 'profession',\n",
|
||
" 'language', 'mcp_contact_id', 'need_reload', 'last_buying_date',\n",
|
||
" 'max_price', 'ticket_sum', 'average_price', 'fidelity',\n",
|
||
" 'average_purchase_delay', 'average_price_basket',\n",
|
||
" 'average_ticket_basket', 'total_price', 'preferred_category',\n",
|
||
" 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
|
||
" 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
|
||
" 'tenant_id'],\n",
|
||
" dtype='object')\n",
|
||
"(71307, 43)\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 71307 entries, 0 to 71306\n",
|
||
"Data columns (total 43 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 id 71307 non-null int64 \n",
|
||
" 1 lastname 41045 non-null object \n",
|
||
" 2 firstname 39140 non-null object \n",
|
||
" 3 birthdate 18174 non-null object \n",
|
||
" 4 email 58203 non-null object \n",
|
||
" 5 street_id 71307 non-null int64 \n",
|
||
" 6 created_at 71307 non-null object \n",
|
||
" 7 updated_at 71307 non-null object \n",
|
||
" 8 civility 0 non-null float64\n",
|
||
" 9 is_partner 71307 non-null bool \n",
|
||
" 10 extra 0 non-null float64\n",
|
||
" 11 deleted_at 0 non-null float64\n",
|
||
" 12 reference 0 non-null float64\n",
|
||
" 13 gender 71307 non-null int64 \n",
|
||
" 14 is_email_true 71307 non-null bool \n",
|
||
" 15 extra_field 0 non-null float64\n",
|
||
" 16 identifier 71307 non-null object \n",
|
||
" 17 opt_in 71307 non-null bool \n",
|
||
" 18 structure_id 616 non-null float64\n",
|
||
" 19 note 451 non-null object \n",
|
||
" 20 profession 812 non-null object \n",
|
||
" 21 language 0 non-null float64\n",
|
||
" 22 mcp_contact_id 22417 non-null float64\n",
|
||
" 23 need_reload 71307 non-null bool \n",
|
||
" 24 last_buying_date 34040 non-null object \n",
|
||
" 25 max_price 34040 non-null float64\n",
|
||
" 26 ticket_sum 71307 non-null int64 \n",
|
||
" 27 average_price 68694 non-null float64\n",
|
||
" 28 fidelity 71307 non-null int64 \n",
|
||
" 29 average_purchase_delay 34040 non-null float64\n",
|
||
" 30 average_price_basket 34040 non-null float64\n",
|
||
" 31 average_ticket_basket 34040 non-null float64\n",
|
||
" 32 total_price 36653 non-null float64\n",
|
||
" 33 preferred_category 0 non-null float64\n",
|
||
" 34 preferred_supplier 0 non-null float64\n",
|
||
" 35 preferred_formula 0 non-null float64\n",
|
||
" 36 purchase_count 71307 non-null int64 \n",
|
||
" 37 first_buying_date 34040 non-null object \n",
|
||
" 38 last_visiting_date 0 non-null float64\n",
|
||
" 39 zipcode 33756 non-null object \n",
|
||
" 40 country 39910 non-null object \n",
|
||
" 41 age 18174 non-null float64\n",
|
||
" 42 tenant_id 71307 non-null int64 \n",
|
||
"dtypes: bool(4), float64(19), int64(7), object(13)\n",
|
||
"memory usage: 21.5+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Tags = clients\n",
|
||
"FILE_PATH_S3 = 'bdc2324-data/11/11customersplus.csv'\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" customersplus = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"print(customersplus.columns)\n",
|
||
"print(customersplus.shape)\n",
|
||
"customersplus.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"id": "948a0b2b-8d1c-4afb-802e-670d67dd8c20",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>lastname</th>\n",
|
||
" <th>firstname</th>\n",
|
||
" <th>birthdate</th>\n",
|
||
" <th>email</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>civility</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>preferred_category</th>\n",
|
||
" <th>preferred_supplier</th>\n",
|
||
" <th>preferred_formula</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>last_visiting_date</th>\n",
|
||
" <th>zipcode</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>286834</td>\n",
|
||
" <td>lastname286834</td>\n",
|
||
" <td>firstname286834</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email286834</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>2022-05-19 10:09:09.361137+02:00</td>\n",
|
||
" <td>2022-05-19 10:09:09.361137+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>330695</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email330695</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-07-16 04:10:34.135134+02:00</td>\n",
|
||
" <td>2022-07-16 04:10:34.156704+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>330978</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email330978</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-07-21 22:14:09.811721+02:00</td>\n",
|
||
" <td>2022-07-21 22:14:09.836051+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>338697</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email338697</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-09-15 19:02:03.950536+02:00</td>\n",
|
||
" <td>2022-09-15 19:02:03.985642+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>338726</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email338726</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-09-16 01:24:40.719882+02:00</td>\n",
|
||
" <td>2022-09-16 01:24:40.742753+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71302</th>\n",
|
||
" <td>27105</td>\n",
|
||
" <td>lastname27105</td>\n",
|
||
" <td>firstname27105</td>\n",
|
||
" <td>1957-01-26</td>\n",
|
||
" <td>email27105</td>\n",
|
||
" <td>205024</td>\n",
|
||
" <td>2021-04-22 15:12:59.986534+02:00</td>\n",
|
||
" <td>2023-09-12 18:59:31.613235+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2018-12-31 18:56:57+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>35700</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>66.0</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71303</th>\n",
|
||
" <td>27108</td>\n",
|
||
" <td>lastname27108</td>\n",
|
||
" <td>firstname27108</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>205024</td>\n",
|
||
" <td>2021-04-22 15:12:59.989197+02:00</td>\n",
|
||
" <td>2023-09-12 18:27:34.380843+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>2015-12-29 14:51:46+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>35700</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71304</th>\n",
|
||
" <td>27110</td>\n",
|
||
" <td>lastname27110</td>\n",
|
||
" <td>firstname27110</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>2021-04-22 15:12:59.991029+02:00</td>\n",
|
||
" <td>2022-04-14 11:41:33.738500+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2018-12-31 19:12:59+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71305</th>\n",
|
||
" <td>10607</td>\n",
|
||
" <td>lastname10607</td>\n",
|
||
" <td>firstname10607</td>\n",
|
||
" <td>1963-01-04</td>\n",
|
||
" <td>email10607</td>\n",
|
||
" <td>313332</td>\n",
|
||
" <td>2021-04-22 14:56:45.742226+02:00</td>\n",
|
||
" <td>2023-09-12 17:55:17.723195+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>26</td>\n",
|
||
" <td>2015-10-10 14:11:21+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>35850</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71306</th>\n",
|
||
" <td>19095</td>\n",
|
||
" <td>lastname19095</td>\n",
|
||
" <td>firstname19095</td>\n",
|
||
" <td>1979-07-16</td>\n",
|
||
" <td>email19095</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>2021-04-22 15:06:30.120537+02:00</td>\n",
|
||
" <td>2023-09-12 18:27:36.904104+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2019-05-19 21:18:36+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>44.0</td>\n",
|
||
" <td>1556</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>71307 rows × 43 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id lastname firstname birthdate email \\\n",
|
||
"0 286834 lastname286834 firstname286834 NaN email286834 \n",
|
||
"1 330695 NaN NaN NaN email330695 \n",
|
||
"2 330978 NaN NaN NaN email330978 \n",
|
||
"3 338697 NaN NaN NaN email338697 \n",
|
||
"4 338726 NaN NaN NaN email338726 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"71302 27105 lastname27105 firstname27105 1957-01-26 email27105 \n",
|
||
"71303 27108 lastname27108 firstname27108 NaN NaN \n",
|
||
"71304 27110 lastname27110 firstname27110 NaN NaN \n",
|
||
"71305 10607 lastname10607 firstname10607 1963-01-04 email10607 \n",
|
||
"71306 19095 lastname19095 firstname19095 1979-07-16 email19095 \n",
|
||
"\n",
|
||
" street_id created_at \\\n",
|
||
"0 6 2022-05-19 10:09:09.361137+02:00 \n",
|
||
"1 1 2022-07-16 04:10:34.135134+02:00 \n",
|
||
"2 1 2022-07-21 22:14:09.811721+02:00 \n",
|
||
"3 1 2022-09-15 19:02:03.950536+02:00 \n",
|
||
"4 1 2022-09-16 01:24:40.719882+02:00 \n",
|
||
"... ... ... \n",
|
||
"71302 205024 2021-04-22 15:12:59.986534+02:00 \n",
|
||
"71303 205024 2021-04-22 15:12:59.989197+02:00 \n",
|
||
"71304 6 2021-04-22 15:12:59.991029+02:00 \n",
|
||
"71305 313332 2021-04-22 14:56:45.742226+02:00 \n",
|
||
"71306 6 2021-04-22 15:06:30.120537+02:00 \n",
|
||
"\n",
|
||
" updated_at civility is_partner ... \\\n",
|
||
"0 2022-05-19 10:09:09.361137+02:00 NaN False ... \n",
|
||
"1 2022-07-16 04:10:34.156704+02:00 NaN False ... \n",
|
||
"2 2022-07-21 22:14:09.836051+02:00 NaN False ... \n",
|
||
"3 2022-09-15 19:02:03.985642+02:00 NaN False ... \n",
|
||
"4 2022-09-16 01:24:40.742753+02:00 NaN False ... \n",
|
||
"... ... ... ... ... \n",
|
||
"71302 2023-09-12 18:59:31.613235+02:00 NaN False ... \n",
|
||
"71303 2023-09-12 18:27:34.380843+02:00 NaN False ... \n",
|
||
"71304 2022-04-14 11:41:33.738500+02:00 NaN False ... \n",
|
||
"71305 2023-09-12 17:55:17.723195+02:00 NaN False ... \n",
|
||
"71306 2023-09-12 18:27:36.904104+02:00 NaN False ... \n",
|
||
"\n",
|
||
" preferred_category preferred_supplier preferred_formula \\\n",
|
||
"0 NaN NaN NaN \n",
|
||
"1 NaN NaN NaN \n",
|
||
"2 NaN NaN NaN \n",
|
||
"3 NaN NaN NaN \n",
|
||
"4 NaN NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"71302 NaN NaN NaN \n",
|
||
"71303 NaN NaN NaN \n",
|
||
"71304 NaN NaN NaN \n",
|
||
"71305 NaN NaN NaN \n",
|
||
"71306 NaN NaN NaN \n",
|
||
"\n",
|
||
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
|
||
"0 0 NaN NaN NaN \n",
|
||
"1 0 NaN NaN NaN \n",
|
||
"2 0 NaN NaN NaN \n",
|
||
"3 0 NaN NaN NaN \n",
|
||
"4 0 NaN NaN NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"71302 2 2018-12-31 18:56:57+01:00 NaN 35700 \n",
|
||
"71303 6 2015-12-29 14:51:46+01:00 NaN 35700 \n",
|
||
"71304 1 2018-12-31 19:12:59+01:00 NaN NaN \n",
|
||
"71305 26 2015-10-10 14:11:21+02:00 NaN 35850 \n",
|
||
"71306 2 2019-05-19 21:18:36+02:00 NaN NaN \n",
|
||
"\n",
|
||
" country age tenant_id \n",
|
||
"0 fr NaN 1556 \n",
|
||
"1 NaN NaN 1556 \n",
|
||
"2 NaN NaN 1556 \n",
|
||
"3 NaN NaN 1556 \n",
|
||
"4 NaN NaN 1556 \n",
|
||
"... ... ... ... \n",
|
||
"71302 fr 66.0 1556 \n",
|
||
"71303 fr NaN 1556 \n",
|
||
"71304 fr NaN 1556 \n",
|
||
"71305 fr 60.0 1556 \n",
|
||
"71306 fr 44.0 1556 \n",
|
||
"\n",
|
||
"[71307 rows x 43 columns]"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customersplus"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "c40c44a0-e7c2-4ad1-b700-0d6ea05d62b2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# But : lier les caractéristiques socio-demo et les comportements d'achat\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.13"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|