8899 lines
351 KiB
Plaintext
8899 lines
351 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "455cc769-1b3b-4fef-b395-e74a988ceed3",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Notebook Alexis"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "20eeb149-6618-4ef2-9cfd-ff062950f36c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import os\n",
|
||
"import s3fs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 275,
|
||
"id": "30494c5e-9649-4fff-8708-617544188b20",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/1',\n",
|
||
" 'bdc2324-data/10',\n",
|
||
" 'bdc2324-data/101',\n",
|
||
" 'bdc2324-data/11',\n",
|
||
" 'bdc2324-data/12',\n",
|
||
" 'bdc2324-data/13',\n",
|
||
" 'bdc2324-data/14',\n",
|
||
" 'bdc2324-data/2',\n",
|
||
" 'bdc2324-data/3',\n",
|
||
" 'bdc2324-data/4',\n",
|
||
" 'bdc2324-data/5',\n",
|
||
" 'bdc2324-data/6',\n",
|
||
" 'bdc2324-data/7',\n",
|
||
" 'bdc2324-data/8',\n",
|
||
" 'bdc2324-data/9']"
|
||
]
|
||
},
|
||
"execution_count": 275,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"BUCKET = \"bdc2324-data\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2feffee9-9f23-4caa-8a01-9e4a93abbf5d",
|
||
"metadata": {},
|
||
"source": [
|
||
"### I. Analyse fichier 8"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f54ba449-2051-4acd-939d-d30abd5452fe",
|
||
"metadata": {},
|
||
"source": [
|
||
"This section describes the databases associated with company 8. "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 276,
|
||
"id": "f1cce705-46e1-42de-8e93-2ee15312d288",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"directory_path = '8'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 277,
|
||
"id": "82d4db0e-0cd5-49af-a4d3-f17f54b1c03c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8campaign_stats.csv\n",
|
||
"bdc2324-data/8/8campaigns.csv\n",
|
||
"bdc2324-data/8/8categories.csv\n",
|
||
"bdc2324-data/8/8countries.csv\n",
|
||
"bdc2324-data/8/8currencies.csv\n",
|
||
"bdc2324-data/8/8customer_target_mappings.csv\n",
|
||
"bdc2324-data/8/8customersplus.csv\n",
|
||
"bdc2324-data/8/8event_types.csv\n",
|
||
"bdc2324-data/8/8events.csv\n",
|
||
"bdc2324-data/8/8facilities.csv\n",
|
||
"bdc2324-data/8/8link_stats.csv\n",
|
||
"bdc2324-data/8/8pricing_formulas.csv\n",
|
||
"bdc2324-data/8/8product_packs.csv\n",
|
||
"bdc2324-data/8/8products.csv\n",
|
||
"bdc2324-data/8/8products_groups.csv\n",
|
||
"bdc2324-data/8/8purchases.csv\n",
|
||
"bdc2324-data/8/8representation_category_capacities.csv\n",
|
||
"bdc2324-data/8/8representations.csv\n",
|
||
"bdc2324-data/8/8seasons.csv\n",
|
||
"bdc2324-data/8/8suppliers.csv\n",
|
||
"bdc2324-data/8/8target_types.csv\n",
|
||
"bdc2324-data/8/8targets.csv\n",
|
||
"bdc2324-data/8/8tickets.csv\n",
|
||
"bdc2324-data/8/8type_of_categories.csv\n",
|
||
"bdc2324-data/8/8type_of_pricing_formulas.csv\n",
|
||
"bdc2324-data/8/8type_ofs.csv\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# check the files in the directory\n",
|
||
"\n",
|
||
"objects = fs.ls(f'{BUCKET}/{directory_path}')\n",
|
||
"\n",
|
||
"for file in objects:\n",
|
||
" print(file)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 278,
|
||
"id": "65cb38ad-52ae-4266-85d8-c47d81b00283",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def display_databases(file_name):\n",
|
||
" \"\"\"\n",
|
||
" This function returns the file from s3 storage\n",
|
||
" \"\"\"\n",
|
||
" file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
" print(\"File path : \", file_path)\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
" print(\"Shape : \", df.shape)\n",
|
||
" return df\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ddd545ef-7e9f-4696-962a-115294991641",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Lookt at campaigns files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 279,
|
||
"id": "0214d30d-5f83-498f-867f-e67b5793b731",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8campaigns.csv\n",
|
||
"Shape : (1689, 11)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>service_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>process_id</th>\n",
|
||
" <th>report_url</th>\n",
|
||
" <th>category</th>\n",
|
||
" <th>to_be_synced</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>sent_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>#LOUSFP RELANCE P'TITS LOU</td>\n",
|
||
" <td>1436</td>\n",
|
||
" <td>2022-02-01 15:22:53.564432+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.564432+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>eaa32c96f620053cf442ad32258076b9</td>\n",
|
||
" <td>2022-01-31 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>#LOUSFP BRASSERIE ACHETEURS</td>\n",
|
||
" <td>1435</td>\n",
|
||
" <td>2022-02-01 15:22:53.572592+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.572592+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1f3202d820180a39f736f20fce790de8</td>\n",
|
||
" <td>2022-01-31 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>PRESSE. LOU/SF Paris - RDV et protocole</td>\n",
|
||
" <td>1433</td>\n",
|
||
" <td>2022-02-01 15:22:53.578426+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.578426+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>b069b3415151fa7217e870017374de7c</td>\n",
|
||
" <td>2022-01-31 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>#LOUSFP ÉTUDIANTS</td>\n",
|
||
" <td>1432</td>\n",
|
||
" <td>2022-02-01 15:22:53.584235+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.584235+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>56468d5607a5aaf1604ff5e15593b003</td>\n",
|
||
" <td>2022-01-27 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>#LOUSFP P'TITS LOU</td>\n",
|
||
" <td>1431</td>\n",
|
||
" <td>2022-02-01 15:22:53.590187+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.590187+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>e11943a6031a0e6114ae69c257617980</td>\n",
|
||
" <td>2022-01-27 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name service_id \\\n",
|
||
"0 1 #LOUSFP RELANCE P'TITS LOU 1436 \n",
|
||
"1 2 #LOUSFP BRASSERIE ACHETEURS 1435 \n",
|
||
"2 3 PRESSE. LOU/SF Paris - RDV et protocole 1433 \n",
|
||
"3 4 #LOUSFP ÉTUDIANTS 1432 \n",
|
||
"4 5 #LOUSFP P'TITS LOU 1431 \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2022-02-01 15:22:53.564432+01:00 2022-02-01 15:22:53.564432+01:00 \n",
|
||
"1 2022-02-01 15:22:53.572592+01:00 2022-02-01 15:22:53.572592+01:00 \n",
|
||
"2 2022-02-01 15:22:53.578426+01:00 2022-02-01 15:22:53.578426+01:00 \n",
|
||
"3 2022-02-01 15:22:53.584235+01:00 2022-02-01 15:22:53.584235+01:00 \n",
|
||
"4 2022-02-01 15:22:53.590187+01:00 2022-02-01 15:22:53.590187+01:00 \n",
|
||
"\n",
|
||
" process_id report_url category to_be_synced \\\n",
|
||
"0 NaN NaN 0 False \n",
|
||
"1 NaN NaN 0 False \n",
|
||
"2 NaN NaN 0 False \n",
|
||
"3 NaN NaN 0 False \n",
|
||
"4 NaN NaN 0 False \n",
|
||
"\n",
|
||
" identifier sent_at \n",
|
||
"0 eaa32c96f620053cf442ad32258076b9 2022-01-31 00:00:00+01:00 \n",
|
||
"1 1f3202d820180a39f736f20fce790de8 2022-01-31 00:00:00+01:00 \n",
|
||
"2 b069b3415151fa7217e870017374de7c 2022-01-31 00:00:00+01:00 \n",
|
||
"3 56468d5607a5aaf1604ff5e15593b003 2022-01-27 00:00:00+01:00 \n",
|
||
"4 e11943a6031a0e6114ae69c257617980 2022-01-27 00:00:00+01:00 "
|
||
]
|
||
},
|
||
"execution_count": 279,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"campaigns = display_databases(\"8campaigns.csv\")\n",
|
||
"campaigns.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 280,
|
||
"id": "e7982be4-2c42-4a91-be5a-329a999644cc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8campaign_stats.csv\n",
|
||
"Shape : (2527083, 8)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>campaign_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>opened_at</th>\n",
|
||
" <th>sent_at</th>\n",
|
||
" <th>delivered_at</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>161410</td>\n",
|
||
" <td>2022-02-02 18:16:07+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:16:08.616899+01:00</td>\n",
|
||
" <td>2022-02-02 17:16:08.623098+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>54228</td>\n",
|
||
" <td>2022-02-02 18:18:11+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:18:12.030260+01:00</td>\n",
|
||
" <td>2022-02-02 17:18:12.036606+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>120794</td>\n",
|
||
" <td>2022-02-02 18:18:58+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:19:00.129697+01:00</td>\n",
|
||
" <td>2022-02-02 17:19:00.134704+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>467025</td>\n",
|
||
" <td>2022-02-02 18:19:33+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:19:34.023492+01:00</td>\n",
|
||
" <td>2022-02-02 17:19:34.027570+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>142106</td>\n",
|
||
" <td>2022-02-02 18:19:35+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:19:36.553321+01:00</td>\n",
|
||
" <td>2022-02-02 17:19:36.557473+01:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id campaign_id customer_id opened_at sent_at \\\n",
|
||
"0 1 5 161410 2022-02-02 18:16:07+01:00 NaN \n",
|
||
"1 2 1 54228 2022-02-02 18:18:11+01:00 NaN \n",
|
||
"2 3 6 120794 2022-02-02 18:18:58+01:00 NaN \n",
|
||
"3 4 3 467025 2022-02-02 18:19:33+01:00 NaN \n",
|
||
"4 5 2 142106 2022-02-02 18:19:35+01:00 NaN \n",
|
||
"\n",
|
||
" delivered_at created_at \\\n",
|
||
"0 NaN 2022-02-02 17:16:08.616899+01:00 \n",
|
||
"1 NaN 2022-02-02 17:18:12.030260+01:00 \n",
|
||
"2 NaN 2022-02-02 17:19:00.129697+01:00 \n",
|
||
"3 NaN 2022-02-02 17:19:34.023492+01:00 \n",
|
||
"4 NaN 2022-02-02 17:19:36.553321+01:00 \n",
|
||
"\n",
|
||
" updated_at \n",
|
||
"0 2022-02-02 17:16:08.623098+01:00 \n",
|
||
"1 2022-02-02 17:18:12.036606+01:00 \n",
|
||
"2 2022-02-02 17:19:00.134704+01:00 \n",
|
||
"3 2022-02-02 17:19:34.027570+01:00 \n",
|
||
"4 2022-02-02 17:19:36.557473+01:00 "
|
||
]
|
||
},
|
||
"execution_count": 280,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"campaign_stats = display_databases(\"8campaign_stats.csv\")\n",
|
||
"campaign_stats.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e6512bc9-91f5-4fe4-a637-a4e84dc497a9",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Look at links files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "28e7c1fe-470f-4d84-87b8-a711a973500b",
|
||
"metadata": {},
|
||
"source": [
|
||
"There is no links file for these company. Only the link_stats file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 281,
|
||
"id": "e973575b-4ed6-4b23-8024-f383ac82e87c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8link_stats.csv\n",
|
||
"Shape : (108461, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>clicked_at</th>\n",
|
||
" <th>link_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-02-02 18:33:17+01:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>62137</td>\n",
|
||
" <td>2022-02-02 17:33:19.237759+01:00</td>\n",
|
||
" <td>2022-02-02 17:33:19.237759+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2022-02-02 18:33:26+01:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>556048</td>\n",
|
||
" <td>2022-02-02 17:33:28.101943+01:00</td>\n",
|
||
" <td>2022-02-02 17:33:28.101943+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2022-02-02 18:33:49+01:00</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>194456</td>\n",
|
||
" <td>2022-02-02 17:33:50.595125+01:00</td>\n",
|
||
" <td>2022-02-02 17:33:50.595125+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2022-02-02 18:34:19+01:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>194456</td>\n",
|
||
" <td>2022-02-02 17:34:20.493986+01:00</td>\n",
|
||
" <td>2022-02-02 17:34:20.493986+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2022-02-02 18:34:21+01:00</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>21571</td>\n",
|
||
" <td>2022-02-02 17:34:22.300427+01:00</td>\n",
|
||
" <td>2022-02-02 17:34:22.300427+01:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id clicked_at link_id customer_id \\\n",
|
||
"0 1 2022-02-02 18:33:17+01:00 1 62137 \n",
|
||
"1 2 2022-02-02 18:33:26+01:00 1 556048 \n",
|
||
"2 3 2022-02-02 18:33:49+01:00 2 194456 \n",
|
||
"3 4 2022-02-02 18:34:19+01:00 1 194456 \n",
|
||
"4 5 2022-02-02 18:34:21+01:00 2 21571 \n",
|
||
"\n",
|
||
" created_at updated_at \n",
|
||
"0 2022-02-02 17:33:19.237759+01:00 2022-02-02 17:33:19.237759+01:00 \n",
|
||
"1 2022-02-02 17:33:28.101943+01:00 2022-02-02 17:33:28.101943+01:00 \n",
|
||
"2 2022-02-02 17:33:50.595125+01:00 2022-02-02 17:33:50.595125+01:00 \n",
|
||
"3 2022-02-02 17:34:20.493986+01:00 2022-02-02 17:34:20.493986+01:00 \n",
|
||
"4 2022-02-02 17:34:22.300427+01:00 2022-02-02 17:34:22.300427+01:00 "
|
||
]
|
||
},
|
||
"execution_count": 281,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"links_stats = display_databases(\"8link_stats.csv\")\n",
|
||
"links_stats.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "8dfcca1f-1323-413f-aa8d-3ee5ce2610a8",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyse Customersplus file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 282,
|
||
"id": "3b523575-c779-451c-a12e-a36fb4ad232c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8customersplus.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_548/2210053343.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" customersplus = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>lastname</th>\n",
|
||
" <th>firstname</th>\n",
|
||
" <th>birthdate</th>\n",
|
||
" <th>email</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>civility</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>preferred_category</th>\n",
|
||
" <th>preferred_supplier</th>\n",
|
||
" <th>preferred_formula</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>last_visiting_date</th>\n",
|
||
" <th>zipcode</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1411166</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email1411166</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-12-19 15:03:39.419371+01:00</td>\n",
|
||
" <td>2022-12-19 15:03:39.419371+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478498</td>\n",
|
||
" <td>lastname478498</td>\n",
|
||
" <td>firstname478498</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email478498</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 18:58:30.259053+02:00</td>\n",
|
||
" <td>2023-06-28 15:25:24.146689+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>473678</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email473678</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 18:44:04.119713+02:00</td>\n",
|
||
" <td>2021-09-17 18:44:04.124204+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>475026</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email475026</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 18:47:28.789618+02:00</td>\n",
|
||
" <td>2021-09-17 18:47:28.793958+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>487146</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email487146</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 19:10:24.070460+02:00</td>\n",
|
||
" <td>2021-09-17 19:10:24.076033+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 43 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id lastname firstname birthdate email \\\n",
|
||
"0 1411166 NaN NaN NaN email1411166 \n",
|
||
"1 478498 lastname478498 firstname478498 NaN email478498 \n",
|
||
"2 473678 NaN NaN NaN email473678 \n",
|
||
"3 475026 NaN NaN NaN email475026 \n",
|
||
"4 487146 NaN NaN NaN email487146 \n",
|
||
"\n",
|
||
" street_id created_at \\\n",
|
||
"0 1 2022-12-19 15:03:39.419371+01:00 \n",
|
||
"1 339167 2021-09-17 18:58:30.259053+02:00 \n",
|
||
"2 339167 2021-09-17 18:44:04.119713+02:00 \n",
|
||
"3 339167 2021-09-17 18:47:28.789618+02:00 \n",
|
||
"4 339167 2021-09-17 19:10:24.070460+02:00 \n",
|
||
"\n",
|
||
" updated_at civility is_partner ... \\\n",
|
||
"0 2022-12-19 15:03:39.419371+01:00 NaN False ... \n",
|
||
"1 2023-06-28 15:25:24.146689+02:00 NaN False ... \n",
|
||
"2 2021-09-17 18:44:04.124204+02:00 NaN False ... \n",
|
||
"3 2021-09-17 18:47:28.793958+02:00 NaN False ... \n",
|
||
"4 2021-09-17 19:10:24.076033+02:00 NaN False ... \n",
|
||
"\n",
|
||
" preferred_category preferred_supplier preferred_formula purchase_count \\\n",
|
||
"0 NaN NaN NaN 0 \n",
|
||
"1 NaN NaN NaN 0 \n",
|
||
"2 NaN NaN NaN 0 \n",
|
||
"3 NaN NaN NaN 0 \n",
|
||
"4 NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" first_buying_date last_visiting_date zipcode country age tenant_id \n",
|
||
"0 NaN NaN NaN fr NaN 1594 \n",
|
||
"1 NaN NaN NaN NaN NaN 1594 \n",
|
||
"2 NaN NaN NaN NaN NaN 1594 \n",
|
||
"3 NaN NaN NaN NaN NaN 1594 \n",
|
||
"4 NaN NaN NaN NaN NaN 1594 \n",
|
||
"\n",
|
||
"[5 rows x 43 columns]"
|
||
]
|
||
},
|
||
"execution_count": 282,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8customersplus.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" customersplus = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"customersplus.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fe56785a-ed3c-4322-aafa-a630f97b836f",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyse Structures files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 283,
|
||
"id": "87d801fc-d19a-4c45-9b21-9b6d7a8451fd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8structures.csv\n",
|
||
"No structures database\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8structures.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" structures = pd.read_csv(file_in, sep=\",\")\n",
|
||
"except:\n",
|
||
" print(\"No structures database\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b8452558-2d32-459b-91e7-f6042345e465",
|
||
"metadata": {},
|
||
"source": [
|
||
"For Stade Français, there is no structures, tags and structure_tag_mapping databases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "285b1422-9ca9-4afd-b752-777a54aaa677",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze Target databases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 284,
|
||
"id": "b6e4c3ea-5ccf-4aec-bd2d-79a5a1194178",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8customer_target_mappings.csv\n",
|
||
"Shape : (1449147, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>460062</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>2021-09-17 20:20:24.562734+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.562734+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>460056</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>2021-09-17 20:20:24.610139+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.610139+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>460051</td>\n",
|
||
" <td>65</td>\n",
|
||
" <td>2021-09-17 20:20:24.641381+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.641381+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>460051</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>2021-09-17 20:20:24.672238+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.672238+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>460049</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>2021-09-17 20:20:24.703110+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.703110+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_id created_at \\\n",
|
||
"0 1 460062 68 2021-09-17 20:20:24.562734+02:00 \n",
|
||
"1 2 460056 68 2021-09-17 20:20:24.610139+02:00 \n",
|
||
"2 3 460051 65 2021-09-17 20:20:24.641381+02:00 \n",
|
||
"3 4 460051 66 2021-09-17 20:20:24.672238+02:00 \n",
|
||
"4 5 460049 71 2021-09-17 20:20:24.703110+02:00 \n",
|
||
"\n",
|
||
" updated_at name extra_field \n",
|
||
"0 2021-09-17 20:20:24.562734+02:00 NaN NaN \n",
|
||
"1 2021-09-17 20:20:24.610139+02:00 NaN NaN \n",
|
||
"2 2021-09-17 20:20:24.641381+02:00 NaN NaN \n",
|
||
"3 2021-09-17 20:20:24.672238+02:00 NaN NaN \n",
|
||
"4 2021-09-17 20:20:24.703110+02:00 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 284,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8customer_target_mappings.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" customer_targets = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
"except:\n",
|
||
" print(\"No such database in s3\")\n",
|
||
"\n",
|
||
"print(\"Shape : \", customer_targets.shape)\n",
|
||
"customer_targets.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 285,
|
||
"id": "6e81a35c-3c6f-403d-9ebd-e8399ecd4263",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8targets.csv\n",
|
||
"Shape : (331, 5)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>target_type_id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>ÉTUDIANTS (OPÉ PANIERS) 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.879995+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.879995+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>EFFECTIF + STAFF 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.894758+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.894758+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Acheteurs LOU / USAP</td>\n",
|
||
" <td>2021-09-17 18:10:40.911969+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.911969+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Liste Compensation 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.928796+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.928796+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Partenaires 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.945476+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.945476+02:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id target_type_id name \\\n",
|
||
"0 1 1 ÉTUDIANTS (OPÉ PANIERS) 21-22 \n",
|
||
"1 2 1 EFFECTIF + STAFF 21-22 \n",
|
||
"2 3 1 Acheteurs LOU / USAP \n",
|
||
"3 4 1 Liste Compensation 21-22 \n",
|
||
"4 5 1 Partenaires 21-22 \n",
|
||
"\n",
|
||
" created_at updated_at \n",
|
||
"0 2021-09-17 18:10:40.879995+02:00 2021-09-17 18:10:40.879995+02:00 \n",
|
||
"1 2021-09-17 18:10:40.894758+02:00 2021-09-17 18:10:40.894758+02:00 \n",
|
||
"2 2021-09-17 18:10:40.911969+02:00 2021-09-17 18:10:40.911969+02:00 \n",
|
||
"3 2021-09-17 18:10:40.928796+02:00 2021-09-17 18:10:40.928796+02:00 \n",
|
||
"4 2021-09-17 18:10:40.945476+02:00 2021-09-17 18:10:40.945476+02:00 "
|
||
]
|
||
},
|
||
"execution_count": 285,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8targets.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" targets = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
"except:\n",
|
||
" print(\"No such database in s3\")\n",
|
||
"\n",
|
||
"print(\"Shape : \", targets.shape)\n",
|
||
"targets.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 286,
|
||
"id": "85696d74-3b2f-4368-9045-44db5322b60d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8target_types.csv\n",
|
||
"Shape : (4, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>is_import</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" <td>2021-09-17 18:10:40.864320+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.864320+02:00</td>\n",
|
||
" <td>e34e3aa838a6eb4c41df6ed4444b796a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_dynamic_filter</td>\n",
|
||
" <td>2022-03-09 14:41:45.695407+01:00</td>\n",
|
||
" <td>2022-03-09 14:41:45.695407+01:00</td>\n",
|
||
" <td>e0f4b8693184850fefd6d2a38f10584e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" <td>2022-04-01 17:02:49.588910+02:00</td>\n",
|
||
" <td>2022-04-01 17:02:49.588910+02:00</td>\n",
|
||
" <td>fb27e81baa4debc6a4e1a8639c20e808</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>manual_import</td>\n",
|
||
" <td>2022-05-06 14:26:01.923160+02:00</td>\n",
|
||
" <td>2022-05-06 14:26:01.923160+02:00</td>\n",
|
||
" <td>12213df2ce68a624e4c0070521437bac</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id is_import name created_at \\\n",
|
||
"0 1 NaN manual_static_filter 2021-09-17 18:10:40.864320+02:00 \n",
|
||
"1 2 False manual_dynamic_filter 2022-03-09 14:41:45.695407+01:00 \n",
|
||
"2 3 False manual_static_filter 2022-04-01 17:02:49.588910+02:00 \n",
|
||
"3 4 True manual_import 2022-05-06 14:26:01.923160+02:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-09-17 18:10:40.864320+02:00 e34e3aa838a6eb4c41df6ed4444b796a \n",
|
||
"1 2022-03-09 14:41:45.695407+01:00 e0f4b8693184850fefd6d2a38f10584e \n",
|
||
"2 2022-04-01 17:02:49.588910+02:00 fb27e81baa4debc6a4e1a8639c20e808 \n",
|
||
"3 2022-05-06 14:26:01.923160+02:00 12213df2ce68a624e4c0070521437bac "
|
||
]
|
||
},
|
||
"execution_count": 286,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8target_types.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" target_types = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
"except:\n",
|
||
" print(\"No such database in s3\")\n",
|
||
"\n",
|
||
"print(\"Shape : \", target_types.shape)\n",
|
||
"target_types.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "cdc6416b-3deb-446c-8957-435745b93533",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze consumption files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f8622bd5-a5ab-403f-ab01-758aec879ee4",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning consumptions.csv, suppliers.csv, tickets.csv and purchases.csv\n",
|
||
"\n",
|
||
"However, there is no consumptions.csv file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 287,
|
||
"id": "7c57529b-2ffb-4039-9795-b27c6fbd54a4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8purchases.csv\n",
|
||
"Shape : (975703, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>number</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>119609</td>\n",
|
||
" <td>2017-09-09 15:39:45.913000+02:00</td>\n",
|
||
" <td>1149</td>\n",
|
||
" <td>2021-06-29 21:52:21.816195+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:21.816195+02:00</td>\n",
|
||
" <td>193416</td>\n",
|
||
" <td>f2956e2d53321317e7c15c1cb992156c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>119610</td>\n",
|
||
" <td>2017-09-09 15:39:46.033000+02:00</td>\n",
|
||
" <td>1149</td>\n",
|
||
" <td>2021-06-29 21:52:21.817846+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:21.817846+02:00</td>\n",
|
||
" <td>193416</td>\n",
|
||
" <td>faabab441b2668a85bb484490b2166c3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>5464</td>\n",
|
||
" <td>2017-07-24 19:44:11.923000+02:00</td>\n",
|
||
" <td>1251</td>\n",
|
||
" <td>2021-06-29 21:33:45.604224+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:45.604224+02:00</td>\n",
|
||
" <td>184354</td>\n",
|
||
" <td>f63c69fa585ce4f91681f0d9ebeb770f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>119613</td>\n",
|
||
" <td>2017-09-10 11:25:45.820000+02:00</td>\n",
|
||
" <td>12558</td>\n",
|
||
" <td>2021-06-29 21:52:21.822033+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:21.822033+02:00</td>\n",
|
||
" <td>193462</td>\n",
|
||
" <td>ffce5fd8d2348eb6885d0ee9c7bd017c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1422860</td>\n",
|
||
" <td>2018-10-08 10:30:42.980000+02:00</td>\n",
|
||
" <td>17935</td>\n",
|
||
" <td>2021-07-16 04:20:55.347369+02:00</td>\n",
|
||
" <td>2021-07-16 04:20:55.347369+02:00</td>\n",
|
||
" <td>247459</td>\n",
|
||
" <td>193e41eae8ee078537107a569c0426ef</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id purchase_date customer_id \\\n",
|
||
"0 119609 2017-09-09 15:39:45.913000+02:00 1149 \n",
|
||
"1 119610 2017-09-09 15:39:46.033000+02:00 1149 \n",
|
||
"2 5464 2017-07-24 19:44:11.923000+02:00 1251 \n",
|
||
"3 119613 2017-09-10 11:25:45.820000+02:00 12558 \n",
|
||
"4 1422860 2018-10-08 10:30:42.980000+02:00 17935 \n",
|
||
"\n",
|
||
" created_at updated_at number \\\n",
|
||
"0 2021-06-29 21:52:21.816195+02:00 2021-06-29 21:52:21.816195+02:00 193416 \n",
|
||
"1 2021-06-29 21:52:21.817846+02:00 2021-06-29 21:52:21.817846+02:00 193416 \n",
|
||
"2 2021-06-29 21:33:45.604224+02:00 2021-06-29 21:33:45.604224+02:00 184354 \n",
|
||
"3 2021-06-29 21:52:21.822033+02:00 2021-06-29 21:52:21.822033+02:00 193462 \n",
|
||
"4 2021-07-16 04:20:55.347369+02:00 2021-07-16 04:20:55.347369+02:00 247459 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 f2956e2d53321317e7c15c1cb992156c \n",
|
||
"1 faabab441b2668a85bb484490b2166c3 \n",
|
||
"2 f63c69fa585ce4f91681f0d9ebeb770f \n",
|
||
"3 ffce5fd8d2348eb6885d0ee9c7bd017c \n",
|
||
"4 193e41eae8ee078537107a569c0426ef "
|
||
]
|
||
},
|
||
"execution_count": 287,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"purchases = display_databases(\"8purchases.csv\")\n",
|
||
"purchases.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 288,
|
||
"id": "903321fb-99f8-475d-b4a6-c70ec2efe190",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8tickets.csv\n",
|
||
"Shape : (2370152, 11)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>number</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>product_id</th>\n",
|
||
" <th>is_from_subscription</th>\n",
|
||
" <th>type_of</th>\n",
|
||
" <th>supplier_id</th>\n",
|
||
" <th>barcode</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>254164</td>\n",
|
||
" <td>193416_763837_650_688_326212</td>\n",
|
||
" <td>2021-06-29 21:53:14.951871+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.951871+02:00</td>\n",
|
||
" <td>119609</td>\n",
|
||
" <td>3334</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9ec3b5617fc54512acf131aa5fa26870</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>254165</td>\n",
|
||
" <td>193416_763838_650_688_326236</td>\n",
|
||
" <td>2021-06-29 21:53:14.953717+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.953717+02:00</td>\n",
|
||
" <td>119610</td>\n",
|
||
" <td>3334</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>b227c664e2574a919672683f5cc4c98e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>254168</td>\n",
|
||
" <td>193462_763921_649_687_305676</td>\n",
|
||
" <td>2021-06-29 21:53:14.958207+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.958207+02:00</td>\n",
|
||
" <td>119613</td>\n",
|
||
" <td>3432</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>28ac507ad84a30993bdfc0996fd2476b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>254169</td>\n",
|
||
" <td>193462_763922_649_687_305653</td>\n",
|
||
" <td>2021-06-29 21:53:14.959681+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.959681+02:00</td>\n",
|
||
" <td>119614</td>\n",
|
||
" <td>3268</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>131dbaeef23f5ac2271bf0266ce35476</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>254170</td>\n",
|
||
" <td>193462_763923_649_687_305630</td>\n",
|
||
" <td>2021-06-29 21:53:14.961157+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.961157+02:00</td>\n",
|
||
" <td>119615</td>\n",
|
||
" <td>3268</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1a6342ad2c213b626aa55e5374cd661a</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id number created_at \\\n",
|
||
"0 254164 193416_763837_650_688_326212 2021-06-29 21:53:14.951871+02:00 \n",
|
||
"1 254165 193416_763838_650_688_326236 2021-06-29 21:53:14.953717+02:00 \n",
|
||
"2 254168 193462_763921_649_687_305676 2021-06-29 21:53:14.958207+02:00 \n",
|
||
"3 254169 193462_763922_649_687_305653 2021-06-29 21:53:14.959681+02:00 \n",
|
||
"4 254170 193462_763923_649_687_305630 2021-06-29 21:53:14.961157+02:00 \n",
|
||
"\n",
|
||
" updated_at purchase_id product_id \\\n",
|
||
"0 2021-06-29 21:53:14.951871+02:00 119609 3334 \n",
|
||
"1 2021-06-29 21:53:14.953717+02:00 119610 3334 \n",
|
||
"2 2021-06-29 21:53:14.958207+02:00 119613 3432 \n",
|
||
"3 2021-06-29 21:53:14.959681+02:00 119614 3268 \n",
|
||
"4 2021-06-29 21:53:14.961157+02:00 119615 3268 \n",
|
||
"\n",
|
||
" is_from_subscription type_of supplier_id barcode \\\n",
|
||
"0 False 1 2 NaN \n",
|
||
"1 False 1 2 NaN \n",
|
||
"2 False 1 2 NaN \n",
|
||
"3 False 1 2 NaN \n",
|
||
"4 False 1 2 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 9ec3b5617fc54512acf131aa5fa26870 \n",
|
||
"1 b227c664e2574a919672683f5cc4c98e \n",
|
||
"2 28ac507ad84a30993bdfc0996fd2476b \n",
|
||
"3 131dbaeef23f5ac2271bf0266ce35476 \n",
|
||
"4 1a6342ad2c213b626aa55e5374cd661a "
|
||
]
|
||
},
|
||
"execution_count": 288,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tickets = display_databases(\"8tickets.csv\")\n",
|
||
"tickets.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 289,
|
||
"id": "243e6942-0233-4cd5-b32b-e005457131d2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8suppliers.csv\n",
|
||
"Shape : (16, 9)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>manually_added</th>\n",
|
||
" <th>label</th>\n",
|
||
" <th>itr</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>commission</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>152</td>\n",
|
||
" <td>plateformeceweb</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-07-16 00:02:17.805193+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.805193+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0fc934f49bfa9f1f4e6ab7e2593b6839</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>accreditation annuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-06-29 21:33:14.138349+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.138349+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fe13238540e0ff293ec8aad29aeae6c3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>68</td>\n",
|
||
" <td>abonnement parking</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-06-29 22:10:31.167367+02:00</td>\n",
|
||
" <td>2021-06-29 22:10:31.167367+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0f7defc52a97cdca533af74f4e6e5b1e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>accreditation match</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-06-29 21:33:14.142084+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.142084+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>40e19a7c4824eaad298e0107ed7e3691</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>154</td>\n",
|
||
" <td>web lnr-lou</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-07-16 00:02:17.806521+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.806521+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>b144dd617807b02e0d9002fac6c61768</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name manually_added label itr \\\n",
|
||
"0 152 plateformeceweb False NaN NaN \n",
|
||
"1 6 accreditation annuelle False NaN NaN \n",
|
||
"2 68 abonnement parking False NaN NaN \n",
|
||
"3 9 accreditation match False NaN NaN \n",
|
||
"4 154 web lnr-lou False NaN NaN \n",
|
||
"\n",
|
||
" updated_at created_at \\\n",
|
||
"0 2021-07-16 00:02:17.805193+02:00 2021-07-16 00:02:17.805193+02:00 \n",
|
||
"1 2021-06-29 21:33:14.138349+02:00 2021-06-29 21:33:14.138349+02:00 \n",
|
||
"2 2021-06-29 22:10:31.167367+02:00 2021-06-29 22:10:31.167367+02:00 \n",
|
||
"3 2021-06-29 21:33:14.142084+02:00 2021-06-29 21:33:14.142084+02:00 \n",
|
||
"4 2021-07-16 00:02:17.806521+02:00 2021-07-16 00:02:17.806521+02:00 \n",
|
||
"\n",
|
||
" commission identifier \n",
|
||
"0 NaN 0fc934f49bfa9f1f4e6ab7e2593b6839 \n",
|
||
"1 NaN fe13238540e0ff293ec8aad29aeae6c3 \n",
|
||
"2 NaN 0f7defc52a97cdca533af74f4e6e5b1e \n",
|
||
"3 NaN 40e19a7c4824eaad298e0107ed7e3691 \n",
|
||
"4 NaN b144dd617807b02e0d9002fac6c61768 "
|
||
]
|
||
},
|
||
"execution_count": 289,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"suppliers = display_databases(\"8suppliers.csv\")\n",
|
||
"suppliers.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fd8c876a-f0c5-4123-a422-c267af5f29b1",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyse product file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 290,
|
||
"id": "6b82efce-1dee-4d89-8585-28c4ad477eef",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8products.csv\n",
|
||
"Shape : (45411, 14)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>90013</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1961</td>\n",
|
||
" <td>912</td>\n",
|
||
" <td>2021-07-16 04:56:05.797551+02:00</td>\n",
|
||
" <td>2021-07-16 04:56:05.797551+02:00</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>87917</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>476e111175b1660688b7c13dade2b57e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>662</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>2021-06-29 21:33:17.389201+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:17.389201+02:00</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>640</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2c765698e9bedd48e8a3fd27dc8dbc97</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>646</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>46</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2021-06-29 21:33:17.366742+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:17.366742+02:00</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>624</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4e719148651fd7f175e3fb51bdb5d31b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5703</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>188</td>\n",
|
||
" <td>2021-06-29 21:52:09.374365+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:09.374365+02:00</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5540</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>e4d7beeb0a631e2e51e61951623ba9b1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>648</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>49</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2021-06-29 21:33:17.369471+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:17.369471+02:00</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>626</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>07a5dd9e125345b9458651ab73605255</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id amount is_full_price representation_id pricing_formula_id \\\n",
|
||
"0 90013 0.0 False 1961 912 \n",
|
||
"1 662 0.0 False 11 29 \n",
|
||
"2 646 0.0 False 46 10 \n",
|
||
"3 5703 5.0 False 7 188 \n",
|
||
"4 648 0.0 False 49 10 \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2021-07-16 04:56:05.797551+02:00 2021-07-16 04:56:05.797551+02:00 \n",
|
||
"1 2021-06-29 21:33:17.389201+02:00 2021-06-29 21:33:17.389201+02:00 \n",
|
||
"2 2021-06-29 21:33:17.366742+02:00 2021-06-29 21:33:17.366742+02:00 \n",
|
||
"3 2021-06-29 21:52:09.374365+02:00 2021-06-29 21:52:09.374365+02:00 \n",
|
||
"4 2021-06-29 21:33:17.369471+02:00 2021-06-29 21:33:17.369471+02:00 \n",
|
||
"\n",
|
||
" category_id apply_price products_group_id product_pack_id extra_field \\\n",
|
||
"0 34 0.0 87917 1 NaN \n",
|
||
"1 16 0.0 640 1 NaN \n",
|
||
"2 15 0.0 624 1 NaN \n",
|
||
"3 4 0.0 5540 1 NaN \n",
|
||
"4 15 0.0 626 1 NaN \n",
|
||
"\n",
|
||
" amount_consumption identifier \n",
|
||
"0 NaN 476e111175b1660688b7c13dade2b57e \n",
|
||
"1 NaN 2c765698e9bedd48e8a3fd27dc8dbc97 \n",
|
||
"2 NaN 4e719148651fd7f175e3fb51bdb5d31b \n",
|
||
"3 NaN e4d7beeb0a631e2e51e61951623ba9b1 \n",
|
||
"4 NaN 07a5dd9e125345b9458651ab73605255 "
|
||
]
|
||
},
|
||
"execution_count": 290,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products = display_databases(\"8products.csv\")\n",
|
||
"products.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "8ad143b2-2869-4bd2-982e-688498b98727",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze pricing files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "9a54e9a5-801d-4000-9e76-e792edbf7e41",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning pricing_formulas.csv and type_of_pricing_formulas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 291,
|
||
"id": "daf37bff-a26d-4ff5-ad50-c90f917164bd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8pricing_formulas.csv\n",
|
||
"Shape : (516, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>visite stade enfant</td>\n",
|
||
" <td>2021-06-29 21:33:14.160728+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.160728+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>bbc80e5761a0ea325f6f6a5411752659</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>3229</td>\n",
|
||
" <td>tarif bloc etudiants</td>\n",
|
||
" <td>2021-07-16 04:20:46.684601+02:00</td>\n",
|
||
" <td>2021-09-03 16:44:46.096785+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>205122cc7e96d559330972b0ec0cf35a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>42</td>\n",
|
||
" <td>invitation eiffage</td>\n",
|
||
" <td>2021-06-29 21:33:14.204483+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.204483+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>e4e6365c02e2a7b01ebe2ce8ace624f2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4379</td>\n",
|
||
" <td>invitation offre speciale</td>\n",
|
||
" <td>2021-07-16 05:21:44.984893+02:00</td>\n",
|
||
" <td>2021-07-16 05:21:44.984893+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>307817b6205535a35915a64027ee161e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2641</td>\n",
|
||
" <td>prevente reabo enfant</td>\n",
|
||
" <td>2021-07-16 03:47:40.896805+02:00</td>\n",
|
||
" <td>2021-09-03 16:08:35.304298+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>478eb63c71ba35d8d3d64c8637dafdee</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 7 visite stade enfant 2021-06-29 21:33:14.160728+02:00 \n",
|
||
"1 3229 tarif bloc etudiants 2021-07-16 04:20:46.684601+02:00 \n",
|
||
"2 42 invitation eiffage 2021-06-29 21:33:14.204483+02:00 \n",
|
||
"3 4379 invitation offre speciale 2021-07-16 05:21:44.984893+02:00 \n",
|
||
"4 2641 prevente reabo enfant 2021-07-16 03:47:40.896805+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field \\\n",
|
||
"0 2021-06-29 21:33:14.160728+02:00 NaN \n",
|
||
"1 2021-09-03 16:44:46.096785+02:00 NaN \n",
|
||
"2 2021-06-29 21:33:14.204483+02:00 NaN \n",
|
||
"3 2021-07-16 05:21:44.984893+02:00 NaN \n",
|
||
"4 2021-09-03 16:08:35.304298+02:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 bbc80e5761a0ea325f6f6a5411752659 \n",
|
||
"1 205122cc7e96d559330972b0ec0cf35a \n",
|
||
"2 e4e6365c02e2a7b01ebe2ce8ace624f2 \n",
|
||
"3 307817b6205535a35915a64027ee161e \n",
|
||
"4 478eb63c71ba35d8d3d64c8637dafdee "
|
||
]
|
||
},
|
||
"execution_count": 291,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pricing_formulas = display_databases(\"8pricing_formulas.csv\")\n",
|
||
"pricing_formulas.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 292,
|
||
"id": "cdb14488-b093-4b39-84fa-1c2b4576208f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8type_of_pricing_formulas.csv\n",
|
||
"Shape : (103, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>1021</td>\n",
|
||
" <td>2021-09-03 14:17:19.816110+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.816110+02:00</td>\n",
|
||
" <td>41047fbeb7cd3e1cb2713c608d2f786d</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>4305</td>\n",
|
||
" <td>2021-09-03 14:17:19.848088+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.848088+02:00</td>\n",
|
||
" <td>a62a4dad7d62738129244bbb5ede0747</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>4306</td>\n",
|
||
" <td>2021-09-03 14:17:19.864067+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.864067+02:00</td>\n",
|
||
" <td>c3770373e09f55412068c447736d9da3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>2021-09-03 14:17:19.880078+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.880078+02:00</td>\n",
|
||
" <td>7b7b1242ae7a8c9eb66d35d8a4348ccd</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2021-09-03 14:18:03.616081+02:00</td>\n",
|
||
" <td>2021-09-03 14:18:03.616081+02:00</td>\n",
|
||
" <td>0a2b941c46b31258c03b316aa064e86a</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id type_of_id pricing_formula_id created_at \\\n",
|
||
"0 1 7 1021 2021-09-03 14:17:19.816110+02:00 \n",
|
||
"1 2 7 4305 2021-09-03 14:17:19.848088+02:00 \n",
|
||
"2 3 7 4306 2021-09-03 14:17:19.864067+02:00 \n",
|
||
"3 4 7 29 2021-09-03 14:17:19.880078+02:00 \n",
|
||
"4 5 8 10 2021-09-03 14:18:03.616081+02:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-09-03 14:17:19.816110+02:00 41047fbeb7cd3e1cb2713c608d2f786d \n",
|
||
"1 2021-09-03 14:17:19.848088+02:00 a62a4dad7d62738129244bbb5ede0747 \n",
|
||
"2 2021-09-03 14:17:19.864067+02:00 c3770373e09f55412068c447736d9da3 \n",
|
||
"3 2021-09-03 14:17:19.880078+02:00 7b7b1242ae7a8c9eb66d35d8a4348ccd \n",
|
||
"4 2021-09-03 14:18:03.616081+02:00 0a2b941c46b31258c03b316aa064e86a "
|
||
]
|
||
},
|
||
"execution_count": 292,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type_pricing_formulas = display_databases(\"8type_of_pricing_formulas.csv\")\n",
|
||
"type_pricing_formulas.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a084297a-4fd7-4cda-b513-7704f4244a5c",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze type of products"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "76a67ea7-8720-441e-8973-23e5d105370e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning categories.csv, type_of_categories.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 293,
|
||
"id": "6582694d-5339-4f33-a943-c73033121a90",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8categories.csv\n",
|
||
"Shape : (148, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>quota</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>653</td>\n",
|
||
" <td>acces village implid</td>\n",
|
||
" <td>2021-07-16 00:04:37.181331+02:00</td>\n",
|
||
" <td>2021-07-16 00:04:37.181331+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>c447d053646a6503d3cd84d4798bf5b7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>805</td>\n",
|
||
" <td>parking organisation</td>\n",
|
||
" <td>2021-07-16 01:54:15.822407+02:00</td>\n",
|
||
" <td>2021-07-16 01:54:15.822407+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>02bf9871964345f505ad305080daec36</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>809</td>\n",
|
||
" <td>rose rouge orange</td>\n",
|
||
" <td>2021-07-16 01:54:15.825345+02:00</td>\n",
|
||
" <td>2021-07-16 01:54:15.825345+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>31fb5b57bc1a2bcd5c155fb0d9e7c0dd</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2183</td>\n",
|
||
" <td>2eme catégorie j.b. centrale</td>\n",
|
||
" <td>2021-07-16 04:37:25.446835+02:00</td>\n",
|
||
" <td>2021-07-16 04:37:25.446835+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>c9eb6651caaed42b809b3f4407a847c9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>621</td>\n",
|
||
" <td>acces brasserie</td>\n",
|
||
" <td>2021-07-16 00:02:17.249701+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.249701+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>349e6a59585d78d80d46acbc6a520c50</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 653 acces village implid 2021-07-16 00:04:37.181331+02:00 \n",
|
||
"1 805 parking organisation 2021-07-16 01:54:15.822407+02:00 \n",
|
||
"2 809 rose rouge orange 2021-07-16 01:54:15.825345+02:00 \n",
|
||
"3 2183 2eme catégorie j.b. centrale 2021-07-16 04:37:25.446835+02:00 \n",
|
||
"4 621 acces brasserie 2021-07-16 00:02:17.249701+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field quota \\\n",
|
||
"0 2021-07-16 00:04:37.181331+02:00 NaN NaN \n",
|
||
"1 2021-07-16 01:54:15.822407+02:00 NaN NaN \n",
|
||
"2 2021-07-16 01:54:15.825345+02:00 NaN NaN \n",
|
||
"3 2021-07-16 04:37:25.446835+02:00 NaN NaN \n",
|
||
"4 2021-07-16 00:02:17.249701+02:00 NaN NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 c447d053646a6503d3cd84d4798bf5b7 \n",
|
||
"1 02bf9871964345f505ad305080daec36 \n",
|
||
"2 31fb5b57bc1a2bcd5c155fb0d9e7c0dd \n",
|
||
"3 c9eb6651caaed42b809b3f4407a847c9 \n",
|
||
"4 349e6a59585d78d80d46acbc6a520c50 "
|
||
]
|
||
},
|
||
"execution_count": 293,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"categories = display_databases(\"8categories.csv\")\n",
|
||
"categories.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 294,
|
||
"id": "589076df-1958-42de-9941-1aff9fa8536f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8type_of_categories.csv\n",
|
||
"Shape : (6, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2021-08-20 15:22:05.558209+02:00</td>\n",
|
||
" <td>2021-08-20 15:22:05.558209+02:00</td>\n",
|
||
" <td>af8fa6d57f6b19a7600a69e7771c7c3a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2021-09-02 17:29:32.582002+02:00</td>\n",
|
||
" <td>2021-09-02 17:29:32.582002+02:00</td>\n",
|
||
" <td>63718e7ad306912427758ddf988ad34f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2021-09-02 17:32:38.299733+02:00</td>\n",
|
||
" <td>2021-09-02 17:32:38.299733+02:00</td>\n",
|
||
" <td>5e147d4d90888df14c4584f5c6887c96</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2021-09-02 17:35:04.748993+02:00</td>\n",
|
||
" <td>2021-09-02 17:35:04.748993+02:00</td>\n",
|
||
" <td>a9dfdc3f40b41e3018933c6167fc38a5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>2021-09-02 17:35:37.396740+02:00</td>\n",
|
||
" <td>2021-09-02 17:35:37.396740+02:00</td>\n",
|
||
" <td>c05b0061d2a875adbc35d3dfa6a50a12</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id type_of_id category_id created_at \\\n",
|
||
"0 1 1 2 2021-08-20 15:22:05.558209+02:00 \n",
|
||
"1 2 2 1 2021-09-02 17:29:32.582002+02:00 \n",
|
||
"2 3 3 3 2021-09-02 17:32:38.299733+02:00 \n",
|
||
"3 4 4 4 2021-09-02 17:35:04.748993+02:00 \n",
|
||
"4 5 5 17 2021-09-02 17:35:37.396740+02:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-08-20 15:22:05.558209+02:00 af8fa6d57f6b19a7600a69e7771c7c3a \n",
|
||
"1 2021-09-02 17:29:32.582002+02:00 63718e7ad306912427758ddf988ad34f \n",
|
||
"2 2021-09-02 17:32:38.299733+02:00 5e147d4d90888df14c4584f5c6887c96 \n",
|
||
"3 2021-09-02 17:35:04.748993+02:00 a9dfdc3f40b41e3018933c6167fc38a5 \n",
|
||
"4 2021-09-02 17:35:37.396740+02:00 c05b0061d2a875adbc35d3dfa6a50a12 "
|
||
]
|
||
},
|
||
"execution_count": 294,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type_categories = display_databases(\"8type_of_categories.csv\")\n",
|
||
"type_categories.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "3427b681-4c05-4e4e-9c2b-867ee789f98c",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze type of representations"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "9381e36b-090a-44c5-a29d-3ac4c9a4431e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning representation_category_capacities.csv, representations.csv, representations_types.csv\n",
|
||
"\n",
|
||
"however there is no representation_types database"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 295,
|
||
"id": "6f06d72a-5725-4eee-8e4c-e9ef5820f346",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8representation_category_capacities.csv\n",
|
||
"Shape : (7378, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>expected_filling</th>\n",
|
||
" <th>max_filling</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>561</td>\n",
|
||
" <td>2021-06-29 21:33:14.096827+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.096827+02:00</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>571</td>\n",
|
||
" <td>2021-06-29 21:33:14.110047+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.110047+02:00</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>9665</td>\n",
|
||
" <td>2021-07-16 00:02:17.736387+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.736387+02:00</td>\n",
|
||
" <td>1887</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>383906</td>\n",
|
||
" <td>2023-03-04 02:55:01.585418+01:00</td>\n",
|
||
" <td>2023-03-04 02:55:01.585418+01:00</td>\n",
|
||
" <td>52729</td>\n",
|
||
" <td>476</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>393</td>\n",
|
||
" <td>2021-06-29 21:33:13.876766+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.876766+02:00</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id created_at updated_at \\\n",
|
||
"0 561 2021-06-29 21:33:14.096827+02:00 2021-06-29 21:33:14.096827+02:00 \n",
|
||
"1 571 2021-06-29 21:33:14.110047+02:00 2021-06-29 21:33:14.110047+02:00 \n",
|
||
"2 9665 2021-07-16 00:02:17.736387+02:00 2021-07-16 00:02:17.736387+02:00 \n",
|
||
"3 383906 2023-03-04 02:55:01.585418+01:00 2023-03-04 02:55:01.585418+01:00 \n",
|
||
"4 393 2021-06-29 21:33:13.876766+02:00 2021-06-29 21:33:13.876766+02:00 \n",
|
||
"\n",
|
||
" representation_id category_id expected_filling max_filling \n",
|
||
"0 17 37 NaN NaN \n",
|
||
"1 14 39 NaN NaN \n",
|
||
"2 1887 8 NaN NaN \n",
|
||
"3 52729 476 NaN NaN \n",
|
||
"4 9 23 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 295,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"representation_category_capacities = display_databases(\"8representation_category_capacities.csv\")\n",
|
||
"representation_category_capacities.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 296,
|
||
"id": "bd405913-033d-4f15-a5b9-103d577baaff",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8representations.csv\n",
|
||
"Shape : (1015, 16)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>serial</th>\n",
|
||
" <th>event_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>open</th>\n",
|
||
" <th>satisfaction</th>\n",
|
||
" <th>end_date_time</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>representation_type_id</th>\n",
|
||
" <th>expected_filling</th>\n",
|
||
" <th>max_filling</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>5903</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5836</td>\n",
|
||
" <td>2021-07-16 05:16:57.419565+02:00</td>\n",
|
||
" <td>2021-07-16 05:16:57.419565+02:00</td>\n",
|
||
" <td>2019-08-24 18:00:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8009c34cae4e79e3781f16f3ceeab244</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>67133</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>65652</td>\n",
|
||
" <td>2023-09-27 02:21:36.573001+02:00</td>\n",
|
||
" <td>2023-09-27 02:21:36.573001+02:00</td>\n",
|
||
" <td>2023-10-04 10:30:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4e9d3fc8d1f7bf563dc586548fe6390e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1874</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1826</td>\n",
|
||
" <td>2021-07-16 00:02:17.390274+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.390274+02:00</td>\n",
|
||
" <td>2019-09-14 18:00:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>19f666370c1fc781dff638c20ae04c8a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5904</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5837</td>\n",
|
||
" <td>2021-07-16 05:16:57.420302+02:00</td>\n",
|
||
" <td>2021-07-16 05:16:57.420302+02:00</td>\n",
|
||
" <td>2019-09-01 17:05:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4221acd3f49179f5d0b292c15d1ab8e4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4165</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4106</td>\n",
|
||
" <td>2021-07-16 03:53:05.929713+02:00</td>\n",
|
||
" <td>2021-07-16 03:53:05.929713+02:00</td>\n",
|
||
" <td>2018-10-14 14:00:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>733104286519c0614b2d45470eb180a1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id serial event_id created_at \\\n",
|
||
"0 5903 NaN 5836 2021-07-16 05:16:57.419565+02:00 \n",
|
||
"1 67133 NaN 65652 2023-09-27 02:21:36.573001+02:00 \n",
|
||
"2 1874 NaN 1826 2021-07-16 00:02:17.390274+02:00 \n",
|
||
"3 5904 NaN 5837 2021-07-16 05:16:57.420302+02:00 \n",
|
||
"4 4165 NaN 4106 2021-07-16 03:53:05.929713+02:00 \n",
|
||
"\n",
|
||
" updated_at start_date_time open \\\n",
|
||
"0 2021-07-16 05:16:57.419565+02:00 2019-08-24 18:00:00+02:00 True \n",
|
||
"1 2023-09-27 02:21:36.573001+02:00 2023-10-04 10:30:00+02:00 True \n",
|
||
"2 2021-07-16 00:02:17.390274+02:00 2019-09-14 18:00:00+02:00 True \n",
|
||
"3 2021-07-16 05:16:57.420302+02:00 2019-09-01 17:05:00+02:00 True \n",
|
||
"4 2021-07-16 03:53:05.929713+02:00 2018-10-14 14:00:00+02:00 True \n",
|
||
"\n",
|
||
" satisfaction end_date_time name is_display \\\n",
|
||
"0 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"1 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"2 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"3 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"4 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"\n",
|
||
" representation_type_id expected_filling max_filling extra_field \\\n",
|
||
"0 NaN NaN NaN NaN \n",
|
||
"1 NaN NaN NaN NaN \n",
|
||
"2 NaN NaN NaN NaN \n",
|
||
"3 NaN NaN NaN NaN \n",
|
||
"4 NaN NaN NaN NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 8009c34cae4e79e3781f16f3ceeab244 \n",
|
||
"1 4e9d3fc8d1f7bf563dc586548fe6390e \n",
|
||
"2 19f666370c1fc781dff638c20ae04c8a \n",
|
||
"3 4221acd3f49179f5d0b292c15d1ab8e4 \n",
|
||
"4 733104286519c0614b2d45470eb180a1 "
|
||
]
|
||
},
|
||
"execution_count": 296,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"representations = display_databases(\"8representations.csv\")\n",
|
||
"representations.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 297,
|
||
"id": "0f2c7ea3-6964-48fd-9411-17547b2c3a3f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#representation_type = display_databases(\"8representation_types.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a9b02406-2a69-4431-8d49-3c6bd6a5e1c7",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze type of events"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1d554266-282c-4f64-9a0f-ddcf591ec912",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning events.csv, event_types.csv, seasons.csv and facilities.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 298,
|
||
"id": "cba22ee2-338d-4ce1-a1e8-829a11a94bcf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8events.csv\n",
|
||
"Shape : (922, 12)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>41542</td>\n",
|
||
" <td>2022-10-29 02:54:32.756920+02:00</td>\n",
|
||
" <td>2022-10-29 02:57:35.511792+02:00</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>match lou feminin - lons</td>\n",
|
||
" <td>5588</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5588</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>40cc5a346b1af4ee7108ac28b144fb77</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>21068</td>\n",
|
||
" <td>2021-12-17 03:43:53.166446+01:00</td>\n",
|
||
" <td>2021-12-17 03:46:40.346096+01:00</td>\n",
|
||
" <td>51</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>repas brasserie lou-racing</td>\n",
|
||
" <td>2310</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2310</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>500b670b79aa592ecb06f4957800a752</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>59812</td>\n",
|
||
" <td>2023-05-26 01:45:54.321665+02:00</td>\n",
|
||
" <td>2023-05-26 01:46:01.571397+02:00</td>\n",
|
||
" <td>1501</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>parking match 2</td>\n",
|
||
" <td>10185</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>10185</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>d5f62ed879867b8b51ed7b85f1fc3ab0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3424</td>\n",
|
||
" <td>2021-07-16 03:13:06.988358+02:00</td>\n",
|
||
" <td>2021-07-16 05:33:31.321933+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>rugby + hockey sur glace</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>822b47176c355a647aa2dbdf8dfbc594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>21379</td>\n",
|
||
" <td>2021-12-23 02:37:22.948114+01:00</td>\n",
|
||
" <td>2021-12-23 02:38:20.726329+01:00</td>\n",
|
||
" <td>51</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>bloc des etudiants lou-racing</td>\n",
|
||
" <td>2562</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2562</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17b91f19c71ff6287ffc1f44af952576</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id created_at updated_at \\\n",
|
||
"0 41542 2022-10-29 02:54:32.756920+02:00 2022-10-29 02:57:35.511792+02:00 \n",
|
||
"1 21068 2021-12-17 03:43:53.166446+01:00 2021-12-17 03:46:40.346096+01:00 \n",
|
||
"2 59812 2023-05-26 01:45:54.321665+02:00 2023-05-26 01:46:01.571397+02:00 \n",
|
||
"3 3424 2021-07-16 03:13:06.988358+02:00 2021-07-16 05:33:31.321933+02:00 \n",
|
||
"4 21379 2021-12-23 02:37:22.948114+01:00 2021-12-23 02:38:20.726329+01:00 \n",
|
||
"\n",
|
||
" season_id facility_id name event_type_id \\\n",
|
||
"0 52 1 match lou feminin - lons 5588 \n",
|
||
"1 51 1 repas brasserie lou-racing 2310 \n",
|
||
"2 1501 2 parking match 2 10185 \n",
|
||
"3 1 1 rugby + hockey sur glace 5 \n",
|
||
"4 51 1 bloc des etudiants lou-racing 2562 \n",
|
||
"\n",
|
||
" manual_added is_display event_type_key_id facility_key_id \\\n",
|
||
"0 False True 5588 1 \n",
|
||
"1 False True 2310 1 \n",
|
||
"2 False True 10185 2 \n",
|
||
"3 False True 5 1 \n",
|
||
"4 False True 2562 1 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 40cc5a346b1af4ee7108ac28b144fb77 \n",
|
||
"1 500b670b79aa592ecb06f4957800a752 \n",
|
||
"2 d5f62ed879867b8b51ed7b85f1fc3ab0 \n",
|
||
"3 822b47176c355a647aa2dbdf8dfbc594 \n",
|
||
"4 17b91f19c71ff6287ffc1f44af952576 "
|
||
]
|
||
},
|
||
"execution_count": 298,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"events = display_databases(\"8events.csv\")\n",
|
||
"events.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 299,
|
||
"id": "3db00b9d-2187-4cb6-980d-8ac6ab9eb460",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8event_types.csv\n",
|
||
"Shape : (73, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>standard</td>\n",
|
||
" <td>2021-06-29 13:52:10.434850+02:00</td>\n",
|
||
" <td>2021-06-29 13:52:10.434850+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>c00f0c4675b91fb8b918e4079a0b1bac</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>ptit lou</td>\n",
|
||
" <td>2021-06-29 21:33:13.000743+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.000743+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>dedd3579bc13b3ed7a90277247d9944b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>274</td>\n",
|
||
" <td>parking 19-20</td>\n",
|
||
" <td>2021-07-16 00:02:17.225410+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.225410+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>0d348caeec0b66f9d4987dfbe30e1e8b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>129</td>\n",
|
||
" <td>events 2018-2019</td>\n",
|
||
" <td>2021-06-30 01:35:18.110429+02:00</td>\n",
|
||
" <td>2021-06-30 01:35:18.110429+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>65eb39ddf8f79d28d93c2f2c53118f50</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>accreditations 2017-2018</td>\n",
|
||
" <td>2021-06-29 21:33:12.999510+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:12.999510+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>732cfdcf2065fa0005faf42793ddd76c</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 1 standard 2021-06-29 13:52:10.434850+02:00 \n",
|
||
"1 11 ptit lou 2021-06-29 21:33:13.000743+02:00 \n",
|
||
"2 274 parking 19-20 2021-07-16 00:02:17.225410+02:00 \n",
|
||
"3 129 events 2018-2019 2021-06-30 01:35:18.110429+02:00 \n",
|
||
"4 10 accreditations 2017-2018 2021-06-29 21:33:12.999510+02:00 \n",
|
||
"\n",
|
||
" updated_at fidelity_delay \\\n",
|
||
"0 2021-06-29 13:52:10.434850+02:00 36 \n",
|
||
"1 2021-06-29 21:33:13.000743+02:00 36 \n",
|
||
"2 2021-07-16 00:02:17.225410+02:00 36 \n",
|
||
"3 2021-06-30 01:35:18.110429+02:00 36 \n",
|
||
"4 2021-06-29 21:33:12.999510+02:00 36 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 c00f0c4675b91fb8b918e4079a0b1bac \n",
|
||
"1 dedd3579bc13b3ed7a90277247d9944b \n",
|
||
"2 0d348caeec0b66f9d4987dfbe30e1e8b \n",
|
||
"3 65eb39ddf8f79d28d93c2f2c53118f50 \n",
|
||
"4 732cfdcf2065fa0005faf42793ddd76c "
|
||
]
|
||
},
|
||
"execution_count": 299,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"event_types = display_databases(\"8event_types.csv\")\n",
|
||
"event_types.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 300,
|
||
"id": "cba0ee58-6280-45fe-99b3-0be09db5922b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8seasons.csv\n",
|
||
"Shape : (16, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1501</td>\n",
|
||
" <td>saison 2023-2024</td>\n",
|
||
" <td>2022-06-25 03:07:31.209270+02:00</td>\n",
|
||
" <td>2022-06-25 03:07:31.209270+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>71f5c069ce45c5e933dcc37c22507fbf</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1194</td>\n",
|
||
" <td>saison 2049-2050</td>\n",
|
||
" <td>2022-02-17 03:24:23.942691+01:00</td>\n",
|
||
" <td>2022-02-17 03:24:23.942691+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>44e20620bbc5926db2e295d38b606afd</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>saison 2016-2017</td>\n",
|
||
" <td>2021-06-29 21:33:00.702563+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:00.702563+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>f9cf989d4f49300220df67ef93aa2294</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>47</td>\n",
|
||
" <td>saison 2018-2019</td>\n",
|
||
" <td>2021-06-30 01:35:15.156097+02:00</td>\n",
|
||
" <td>2021-06-30 01:35:15.156097+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>eec50c35fbf8593b364ced287335d90c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>100</td>\n",
|
||
" <td>saison 2010-2011</td>\n",
|
||
" <td>2021-07-16 00:23:27.607648+02:00</td>\n",
|
||
" <td>2021-07-16 00:23:27.607648+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7ccc51049a85e0df9b80662e45b6ddb8</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 1501 saison 2023-2024 2022-06-25 03:07:31.209270+02:00 \n",
|
||
"1 1194 saison 2049-2050 2022-02-17 03:24:23.942691+01:00 \n",
|
||
"2 2 saison 2016-2017 2021-06-29 21:33:00.702563+02:00 \n",
|
||
"3 47 saison 2018-2019 2021-06-30 01:35:15.156097+02:00 \n",
|
||
"4 100 saison 2010-2011 2021-07-16 00:23:27.607648+02:00 \n",
|
||
"\n",
|
||
" updated_at start_date_time \\\n",
|
||
"0 2022-06-25 03:07:31.209270+02:00 NaN \n",
|
||
"1 2022-02-17 03:24:23.942691+01:00 NaN \n",
|
||
"2 2021-06-29 21:33:00.702563+02:00 NaN \n",
|
||
"3 2021-06-30 01:35:15.156097+02:00 NaN \n",
|
||
"4 2021-07-16 00:23:27.607648+02:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 71f5c069ce45c5e933dcc37c22507fbf \n",
|
||
"1 44e20620bbc5926db2e295d38b606afd \n",
|
||
"2 f9cf989d4f49300220df67ef93aa2294 \n",
|
||
"3 eec50c35fbf8593b364ced287335d90c \n",
|
||
"4 7ccc51049a85e0df9b80662e45b6ddb8 "
|
||
]
|
||
},
|
||
"execution_count": 300,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"seasons = display_databases(\"8seasons.csv\")\n",
|
||
"seasons.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 301,
|
||
"id": "6fa82fd7-d6d3-4857-af24-ea573b1129d0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8facilities.csv\n",
|
||
"Shape : (5, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>74</td>\n",
|
||
" <td>plan pour campagne d'abo 2011/2012</td>\n",
|
||
" <td>2021-07-16 00:23:30.337698+02:00</td>\n",
|
||
" <td>2021-07-16 00:23:30.337698+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2e1d25d5f7e46e23c734fe0e4951390e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>accreditation</td>\n",
|
||
" <td>2021-06-29 21:33:13.018552+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.018552+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>da37a04e592cbd344142730ce05a6887</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>organisation match exterieur</td>\n",
|
||
" <td>2021-06-29 21:33:13.019878+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.019878+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8f9ee8c2e954585f7c68096d7f1cf4f1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>parking matmut stadium</td>\n",
|
||
" <td>2021-06-29 21:33:13.017165+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.017165+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aeab282982ea738674dbf5c3763a0be0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>matmut stadium</td>\n",
|
||
" <td>2021-06-29 21:33:13.004560+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.004560+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>89feffd283ebdabdc3b81fb62ea4f6f0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 74 plan pour campagne d'abo 2011/2012 2021-07-16 00:23:30.337698+02:00 \n",
|
||
"1 3 accreditation 2021-06-29 21:33:13.018552+02:00 \n",
|
||
"2 4 organisation match exterieur 2021-06-29 21:33:13.019878+02:00 \n",
|
||
"3 2 parking matmut stadium 2021-06-29 21:33:13.017165+02:00 \n",
|
||
"4 1 matmut stadium 2021-06-29 21:33:13.004560+02:00 \n",
|
||
"\n",
|
||
" updated_at street_id fixed_capacity \\\n",
|
||
"0 2021-07-16 00:23:30.337698+02:00 1 NaN \n",
|
||
"1 2021-06-29 21:33:13.018552+02:00 1 NaN \n",
|
||
"2 2021-06-29 21:33:13.019878+02:00 1 NaN \n",
|
||
"3 2021-06-29 21:33:13.017165+02:00 1 NaN \n",
|
||
"4 2021-06-29 21:33:13.004560+02:00 1 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 2e1d25d5f7e46e23c734fe0e4951390e \n",
|
||
"1 da37a04e592cbd344142730ce05a6887 \n",
|
||
"2 8f9ee8c2e954585f7c68096d7f1cf4f1 \n",
|
||
"3 aeab282982ea738674dbf5c3763a0be0 \n",
|
||
"4 89feffd283ebdabdc3b81fb62ea4f6f0 "
|
||
]
|
||
},
|
||
"execution_count": 301,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"facilities = display_databases(\"8facilities.csv\")\n",
|
||
"facilities.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c7467d41-0ded-465d-bb08-15be914a166b",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze annexe databases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "17e9e334-0ae4-48d8-bed5-b50b4af49d5b",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning contributions.csv, contribution_sites.csv, currencies.csv, countries.csv and type_ofs.csc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d3ec1040-48b2-40bb-8947-920ddb4589f3",
|
||
"metadata": {},
|
||
"source": [
|
||
"## II. Identify Commons Datasets"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ec528a8a-df38-48e2-a1be-4a1459a80a1e",
|
||
"metadata": {},
|
||
"source": [
|
||
"From the analyze of the 8th company, we notice that some databases does not exist. Therefore, in order to construct a uniform database for all companies, we should first identify the common databases between all companies"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 302,
|
||
"id": "c240b811-48a6-4501-9e70-bc51d69e3ac4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"## We first construct a dictionary reporting all the datasets for each companies\n",
|
||
"\n",
|
||
"companies = fs.ls(BUCKET)\n",
|
||
"companies_database = {}\n",
|
||
"\n",
|
||
"for company in companies:\n",
|
||
" companies_database[company.split('/')[-1]] = [file.split('/')[-1].replace(company.split('/')[-1], '') for file in fs.ls(company)] \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 303,
|
||
"id": "54057367-9df9-42f4-aa07-bf524bb76462",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of databases : 30\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Then we create a list of all database\n",
|
||
"\n",
|
||
"all_database = companies_database[max(companies_database, key=lambda x: len(companies_database[x]))]\n",
|
||
"print(\"Number of databases : \",len(all_database))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 304,
|
||
"id": "63914e20-9efc-4088-877b-edab5f225d00",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"30\n",
|
||
"23\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"## We then create a set of database in common for all companies\n",
|
||
"\n",
|
||
"data_in_common = set(all_database)\n",
|
||
"\n",
|
||
"print(len(data_in_common))\n",
|
||
"\n",
|
||
"for key in companies_database:\n",
|
||
" diff_database = data_in_common.symmetric_difference(companies_database[key])\n",
|
||
" data_in_common = data_in_common - diff_database\n",
|
||
"\n",
|
||
"print(len(data_in_common))\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "676d8536-7d8c-4075-a357-b8d06e501ca8",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Create Universal database"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "7e460fbe-5067-4998-a1a8-9e3d07401750",
|
||
"metadata": {},
|
||
"source": [
|
||
"We will first create a procedure to clean the datasets of a company and then merge them. Hence, we will be able to replicate this procedure for all companies and create a universal database.\n",
|
||
"\n",
|
||
"Let's first create our procedure for the company 1 and the datasets belongings to the theme producst"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 305,
|
||
"id": "590a132a-4f57-4ea3-a282-2ef913e4b753",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"directory_path = '1'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 306,
|
||
"id": "0fbebfb7-a827-46b1-890b-86c9def7cdbb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"theme_products = [\"products.csv\" ,\"categories.csv\", \"type_of_categories.csv\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 307,
|
||
"id": "b8aa5f8f-845e-4ee5-b80d-38b7061a94a2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def remove_horodates(df):\n",
|
||
" \"\"\"\n",
|
||
" this function remove horodate columns like created_at and updated_at\n",
|
||
" \"\"\"\n",
|
||
" df = df.drop(columns = [\"created_at\", \"updated_at\"])\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 308,
|
||
"id": "2c478213-09ae-44ef-8c7c-125bcb571642",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def order_columns_id(df):\n",
|
||
" \"\"\"\n",
|
||
" this function puts all id columns at the beginning in order to read the dataset easier\n",
|
||
" \"\"\"\n",
|
||
" substring = 'id'\n",
|
||
" id_columns = [col for col in df.columns if substring in col]\n",
|
||
" remaining_col = [col for col in df.columns if substring not in col]\n",
|
||
" new_order = id_columns + remaining_col\n",
|
||
" return df[new_order]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 309,
|
||
"id": "327e44b0-eb99-4022-b4ca-79548072f0f0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def percent_na(df):\n",
|
||
" \"\"\"\n",
|
||
" this function returns the percentage of na for each column\n",
|
||
" \"\"\"\n",
|
||
" percent_missing = df.isna().sum() * 100 / len(df)\n",
|
||
" return percent_missing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 310,
|
||
"id": "10926def-267f-4e86-b2c9-72e27ff9a9df",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def process_df(df):\n",
|
||
" df = remove_horodates(df)\n",
|
||
" print(\"Number of columns : \", len(df.columns))\n",
|
||
" df = order_columns_id(df)\n",
|
||
" print(\"Columns : \", df.columns)\n",
|
||
" print(\"Percent of NA for each column : \", percent_na(df))\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "98ac02cb-5295-47ca-99c6-99e622c5f388",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of products.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 311,
|
||
"id": "862a7658-0602-4d94-bb58-d23774c00d32",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1products.csv\n",
|
||
"Shape : (94803, 14)\n",
|
||
"Number of columns : 14\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
|
||
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>35c88f2db8a63d7474e46eb8ca9260e7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
|
||
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8a179671ab198e570e6a104c4451379f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
|
||
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>ee83779ce29e67ad251e40234b426d6a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
|
||
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>d865383579314b791aa4bcf3fb418f17</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
|
||
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>f1c4689bc47dee6f60b56d74b593dd46</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id amount is_full_price representation_id pricing_formula_id \\\n",
|
||
"0 10682 9.0 False 914 114 \n",
|
||
"1 478 9.5 False 273 131 \n",
|
||
"2 20873 11.5 False 275 137 \n",
|
||
"3 157142 8.0 False 82519 9 \n",
|
||
"4 1341 8.5 False 9 93 \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2020-09-03 14:09:43.119798+02:00 2020-09-03 14:09:43.119798+02:00 \n",
|
||
"1 2020-09-03 13:21:22.711773+02:00 2020-09-03 13:21:22.711773+02:00 \n",
|
||
"2 2020-09-03 14:46:33.589030+02:00 2020-09-03 14:46:33.589030+02:00 \n",
|
||
"3 2022-01-28 19:29:23.525722+01:00 2022-01-28 19:29:23.525722+01:00 \n",
|
||
"4 2020-09-03 13:29:30.773089+02:00 2020-09-03 13:29:30.773089+02:00 \n",
|
||
"\n",
|
||
" category_id apply_price products_group_id product_pack_id extra_field \\\n",
|
||
"0 41 0.0 10655 1 NaN \n",
|
||
"1 1 0.0 471 1 NaN \n",
|
||
"2 1 0.0 20825 1 NaN \n",
|
||
"3 5 0.0 156773 1 NaN \n",
|
||
"4 1 0.0 1175 1 NaN \n",
|
||
"\n",
|
||
" amount_consumption identifier \n",
|
||
"0 NaN 35c88f2db8a63d7474e46eb8ca9260e7 \n",
|
||
"1 NaN 8a179671ab198e570e6a104c4451379f \n",
|
||
"2 NaN ee83779ce29e67ad251e40234b426d6a \n",
|
||
"3 NaN d865383579314b791aa4bcf3fb418f17 \n",
|
||
"4 NaN f1c4689bc47dee6f60b56d74b593dd46 "
|
||
]
|
||
},
|
||
"execution_count": 311,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products = display_databases(\"1products.csv\")\n",
|
||
"print(\"Number of columns : \", len(products.columns))\n",
|
||
"products.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 312,
|
||
"id": "f0db8c51-2792-4d49-9b1a-d98ce0d9ea28",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 12\n",
|
||
"Columns : Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n",
|
||
" 'products_group_id', 'product_pack_id', 'identifier', 'amount',\n",
|
||
" 'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>35c88f2db8a63d7474e46eb8ca9260e7</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8a179671ab198e570e6a104c4451379f</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>ee83779ce29e67ad251e40234b426d6a</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>d865383579314b791aa4bcf3fb418f17</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>f1c4689bc47dee6f60b56d74b593dd46</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id representation_id pricing_formula_id category_id \\\n",
|
||
"0 10682 914 114 41 \n",
|
||
"1 478 273 131 1 \n",
|
||
"2 20873 275 137 1 \n",
|
||
"3 157142 82519 9 5 \n",
|
||
"4 1341 9 93 1 \n",
|
||
"\n",
|
||
" products_group_id product_pack_id identifier \\\n",
|
||
"0 10655 1 35c88f2db8a63d7474e46eb8ca9260e7 \n",
|
||
"1 471 1 8a179671ab198e570e6a104c4451379f \n",
|
||
"2 20825 1 ee83779ce29e67ad251e40234b426d6a \n",
|
||
"3 156773 1 d865383579314b791aa4bcf3fb418f17 \n",
|
||
"4 1175 1 f1c4689bc47dee6f60b56d74b593dd46 \n",
|
||
"\n",
|
||
" amount is_full_price apply_price extra_field amount_consumption \n",
|
||
"0 9.0 False 0.0 NaN NaN \n",
|
||
"1 9.5 False 0.0 NaN NaN \n",
|
||
"2 11.5 False 0.0 NaN NaN \n",
|
||
"3 8.0 False 0.0 NaN NaN \n",
|
||
"4 8.5 False 0.0 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 312,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products = remove_horodates(products)\n",
|
||
"print(\"Number of columns : \", len(products.columns))\n",
|
||
"products = order_columns_id(products)\n",
|
||
"print(\"Columns : \", products.columns)\n",
|
||
"products.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 313,
|
||
"id": "a383474f-7da9-422c-bb69-3f0cc0b7053f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"id int64\n",
|
||
"representation_id int64\n",
|
||
"pricing_formula_id int64\n",
|
||
"category_id int64\n",
|
||
"products_group_id int64\n",
|
||
"product_pack_id int64\n",
|
||
"identifier object\n",
|
||
"amount float64\n",
|
||
"is_full_price bool\n",
|
||
"apply_price float64\n",
|
||
"extra_field float64\n",
|
||
"amount_consumption float64\n",
|
||
"dtype: object\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(products.dtypes)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 314,
|
||
"id": "460749ac-aa26-4216-8667-518546f72f72",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"id 0.0\n",
|
||
"representation_id 0.0\n",
|
||
"pricing_formula_id 0.0\n",
|
||
"category_id 0.0\n",
|
||
"products_group_id 0.0\n",
|
||
"product_pack_id 0.0\n",
|
||
"identifier 0.0\n",
|
||
"amount 0.0\n",
|
||
"is_full_price 0.0\n",
|
||
"apply_price 0.0\n",
|
||
"extra_field 100.0\n",
|
||
"amount_consumption 100.0\n",
|
||
"dtype: float64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"percent_missing = products.isna().sum() * 100 / len(products)\n",
|
||
"print(percent_missing)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ebcb48ab-adad-42e5-b5d7-7275771cd200",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of categories.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 315,
|
||
"id": "3efce2b6-2d2f-4da9-98ed-1aae17da624c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1categories.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 316,
|
||
"id": "38aa39fd-58af-4fb8-98f2-4269dbaf35de",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1categories.csv\n",
|
||
"Shape : (27, 7)\n",
|
||
"Number of columns : 7\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>quota</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>30</td>\n",
|
||
" <td>en nb entrées gr</td>\n",
|
||
" <td>2020-09-03 13:21:20.019202+02:00</td>\n",
|
||
" <td>2020-09-03 13:21:20.019202+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>849ab2791a14f5fc2bb4d87ab2b78bf6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>16</td>\n",
|
||
" <td>indiv activité enfant</td>\n",
|
||
" <td>2020-09-03 13:11:23.306968+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.306968+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>425fd2f01984cc4ba030c1be98f42c33</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>indiv activité gr</td>\n",
|
||
" <td>2020-09-03 13:21:20.029901+02:00</td>\n",
|
||
" <td>2020-09-03 13:21:20.029901+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9244dd3738788db0d22a5d0afe687b69</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1108</td>\n",
|
||
" <td>groupe forfait adulte</td>\n",
|
||
" <td>2020-09-19 02:06:43.145697+02:00</td>\n",
|
||
" <td>2020-09-19 02:06:43.145697+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3edda20c877a93b5ff883827238eb711</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>groupe forfait entrées tr</td>\n",
|
||
" <td>2020-09-03 13:11:23.264997+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.264997+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>ff48df4b2dd5a14116bf4d280b31621e</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 30 en nb entrées gr 2020-09-03 13:21:20.019202+02:00 \n",
|
||
"1 16 indiv activité enfant 2020-09-03 13:11:23.306968+02:00 \n",
|
||
"2 39 indiv activité gr 2020-09-03 13:21:20.029901+02:00 \n",
|
||
"3 1108 groupe forfait adulte 2020-09-19 02:06:43.145697+02:00 \n",
|
||
"4 6 groupe forfait entrées tr 2020-09-03 13:11:23.264997+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field quota \\\n",
|
||
"0 2020-09-03 13:21:20.019202+02:00 NaN NaN \n",
|
||
"1 2020-09-03 13:11:23.306968+02:00 NaN NaN \n",
|
||
"2 2020-09-03 13:21:20.029901+02:00 NaN NaN \n",
|
||
"3 2020-09-19 02:06:43.145697+02:00 NaN NaN \n",
|
||
"4 2020-09-03 13:11:23.264997+02:00 NaN NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 849ab2791a14f5fc2bb4d87ab2b78bf6 \n",
|
||
"1 425fd2f01984cc4ba030c1be98f42c33 \n",
|
||
"2 9244dd3738788db0d22a5d0afe687b69 \n",
|
||
"3 3edda20c877a93b5ff883827238eb711 \n",
|
||
"4 ff48df4b2dd5a14116bf4d280b31621e "
|
||
]
|
||
},
|
||
"execution_count": 316,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 317,
|
||
"id": "99eb6d14-8b4b-4d55-8fc7-ddf2726096f4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 3.703704\n",
|
||
"extra_field 100.000000\n",
|
||
"quota 100.000000\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>quota</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>30</td>\n",
|
||
" <td>849ab2791a14f5fc2bb4d87ab2b78bf6</td>\n",
|
||
" <td>en nb entrées gr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>16</td>\n",
|
||
" <td>425fd2f01984cc4ba030c1be98f42c33</td>\n",
|
||
" <td>indiv activité enfant</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>9244dd3738788db0d22a5d0afe687b69</td>\n",
|
||
" <td>indiv activité gr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1108</td>\n",
|
||
" <td>3edda20c877a93b5ff883827238eb711</td>\n",
|
||
" <td>groupe forfait adulte</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>ff48df4b2dd5a14116bf4d280b31621e</td>\n",
|
||
" <td>groupe forfait entrées tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id identifier name \\\n",
|
||
"0 30 849ab2791a14f5fc2bb4d87ab2b78bf6 en nb entrées gr \n",
|
||
"1 16 425fd2f01984cc4ba030c1be98f42c33 indiv activité enfant \n",
|
||
"2 39 9244dd3738788db0d22a5d0afe687b69 indiv activité gr \n",
|
||
"3 1108 3edda20c877a93b5ff883827238eb711 groupe forfait adulte \n",
|
||
"4 6 ff48df4b2dd5a14116bf4d280b31621e groupe forfait entrées tr \n",
|
||
"\n",
|
||
" extra_field quota \n",
|
||
"0 NaN NaN \n",
|
||
"1 NaN NaN \n",
|
||
"2 NaN NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 317,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 318,
|
||
"id": "c5f39cc9-dff8-452c-9a3e-9f7df81a8a19",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"extra_field float64\n",
|
||
"quota float64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 318,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c4cb0b37-2262-45c0-97be-b12c503016e3",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of type_of_categories.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "3b4a3af9-ed12-43ec-b17e-fd425b238265",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of representation_category_capacities.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "135966fb-aab1-48d7-bb4c-39a53ee643ca",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of representations.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b480f39f-d5c7-4ded-8f64-ea8ac31f5db5",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of events.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 319,
|
||
"id": "2d52d6da-cca5-4abd-be05-2f00fd3eca8e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1events.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 320,
|
||
"id": "6cab507d-8b11-404d-9286-5cc205228af9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1events.csv\n",
|
||
"Shape : (1232, 12)\n",
|
||
"Number of columns : 12\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>2020-09-03 13:36:42.216991+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.663219+01:00</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>c1cecd093146068fd57896e254e98170</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2023-11-04 02:50:34.602462+01:00</td>\n",
|
||
" <td>2023-11-04 02:52:26.138154+01:00</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>f510a6710878d7aca36e71c54abab525</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>2020-09-03 13:29:27.944002+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.652026+01:00</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>21177fa9acad1ae2b1f595690fb853d3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>2021-07-31 11:16:42.575583+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.663219+01:00</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>962601f1eb153d45d49437f8fe839f7f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>2021-08-17 13:40:34.111923+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.663219+01:00</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>bfa22f5a2364a2dacfc45cca1c8d3215</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id created_at updated_at \\\n",
|
||
"0 192 2020-09-03 13:36:42.216991+02:00 2021-11-02 15:06:40.663219+01:00 \n",
|
||
"1 30329 2023-11-04 02:50:34.602462+01:00 2023-11-04 02:52:26.138154+01:00 \n",
|
||
"2 161 2020-09-03 13:29:27.944002+02:00 2021-11-02 15:06:40.652026+01:00 \n",
|
||
"3 5957 2021-07-31 11:16:42.575583+02:00 2021-11-02 15:06:40.663219+01:00 \n",
|
||
"4 8337 2021-08-17 13:40:34.111923+02:00 2021-11-02 15:06:40.663219+01:00 \n",
|
||
"\n",
|
||
" season_id facility_id name \\\n",
|
||
"0 16 1 frontières \n",
|
||
"1 2767 1 visite guidée une autre histoire du monde (1h00) \n",
|
||
"2 16 1 visite contée les chercheurs d'or indiv \n",
|
||
"3 582 1 we dreamt of utopia and we woke up screaming. \n",
|
||
"4 582 1 jeff koons épisodes 4 \n",
|
||
"\n",
|
||
" event_type_id manual_added is_display event_type_key_id \\\n",
|
||
"0 4 False True 4 \n",
|
||
"1 5 False True 5 \n",
|
||
"2 2 False True 2 \n",
|
||
"3 4 False True 4 \n",
|
||
"4 4 False True 4 \n",
|
||
"\n",
|
||
" facility_key_id identifier \n",
|
||
"0 1 c1cecd093146068fd57896e254e98170 \n",
|
||
"1 1 f510a6710878d7aca36e71c54abab525 \n",
|
||
"2 1 21177fa9acad1ae2b1f595690fb853d3 \n",
|
||
"3 1 962601f1eb153d45d49437f8fe839f7f \n",
|
||
"4 1 bfa22f5a2364a2dacfc45cca1c8d3215 "
|
||
]
|
||
},
|
||
"execution_count": 320,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 321,
|
||
"id": "9fe57873-8108-44c9-b8a5-f58d3cbb6d17",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 10\n",
|
||
"Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n",
|
||
" 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n",
|
||
" dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"season_id 0.000000\n",
|
||
"facility_id 0.000000\n",
|
||
"event_type_id 0.000000\n",
|
||
"event_type_key_id 0.000000\n",
|
||
"facility_key_id 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 0.974026\n",
|
||
"manual_added 0.000000\n",
|
||
"is_display 0.000000\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>c1cecd093146068fd57896e254e98170</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>f510a6710878d7aca36e71c54abab525</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>21177fa9acad1ae2b1f595690fb853d3</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>962601f1eb153d45d49437f8fe839f7f</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>bfa22f5a2364a2dacfc45cca1c8d3215</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id season_id facility_id event_type_id event_type_key_id \\\n",
|
||
"0 192 16 1 4 4 \n",
|
||
"1 30329 2767 1 5 5 \n",
|
||
"2 161 16 1 2 2 \n",
|
||
"3 5957 582 1 4 4 \n",
|
||
"4 8337 582 1 4 4 \n",
|
||
"\n",
|
||
" facility_key_id identifier \\\n",
|
||
"0 1 c1cecd093146068fd57896e254e98170 \n",
|
||
"1 1 f510a6710878d7aca36e71c54abab525 \n",
|
||
"2 1 21177fa9acad1ae2b1f595690fb853d3 \n",
|
||
"3 1 962601f1eb153d45d49437f8fe839f7f \n",
|
||
"4 1 bfa22f5a2364a2dacfc45cca1c8d3215 \n",
|
||
"\n",
|
||
" name manual_added is_display \n",
|
||
"0 frontières False True \n",
|
||
"1 visite guidée une autre histoire du monde (1h00) False True \n",
|
||
"2 visite contée les chercheurs d'or indiv False True \n",
|
||
"3 we dreamt of utopia and we woke up screaming. False True \n",
|
||
"4 jeff koons épisodes 4 False True "
|
||
]
|
||
},
|
||
"execution_count": 321,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 322,
|
||
"id": "7fd9e5bd-baac-4b3b-9ffb-5a9baa18399b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"season_id int64\n",
|
||
"facility_id int64\n",
|
||
"event_type_id int64\n",
|
||
"event_type_key_id int64\n",
|
||
"facility_key_id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"manual_added bool\n",
|
||
"is_display bool\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 322,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "24186efa-5908-4b03-bf52-96415fc8bd54",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of event_types.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 323,
|
||
"id": "90ab62d4-a086-4469-961c-67eefb375388",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1event_types.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 324,
|
||
"id": "58db1751-fd56-4c28-b49e-bc8235bb0dc8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1event_types.csv\n",
|
||
"Shape : (9, 6)\n",
|
||
"Number of columns : 6\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>standard</td>\n",
|
||
" <td>2020-09-03 12:24:22.574262+02:00</td>\n",
|
||
" <td>2020-09-03 12:24:22.574262+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>c00f0c4675b91fb8b918e4079a0b1bac</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>66</td>\n",
|
||
" <td>package</td>\n",
|
||
" <td>2020-09-03 14:05:04.648137+02:00</td>\n",
|
||
" <td>2020-09-03 14:05:04.648137+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>efe90a8e604a7c840e88d03a67f6b7d8</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>83</td>\n",
|
||
" <td>guide multimédias</td>\n",
|
||
" <td>2020-09-03 14:15:17.252539+02:00</td>\n",
|
||
" <td>2020-09-03 14:15:17.252539+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>ee14c62b3b9f6c7dd5401685a18e4460</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>2020-09-03 13:11:23.117024+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.117024+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2723</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-12-22 09:45:47.715105+01:00</td>\n",
|
||
" <td>2021-12-22 09:45:47.715105+01:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>d41d8cd98f00b204e9800998ecf8427e</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 1 standard 2020-09-03 12:24:22.574262+02:00 \n",
|
||
"1 66 package 2020-09-03 14:05:04.648137+02:00 \n",
|
||
"2 83 guide multimédias 2020-09-03 14:15:17.252539+02:00 \n",
|
||
"3 3 non défini 2020-09-03 13:11:23.117024+02:00 \n",
|
||
"4 2723 NaN 2021-12-22 09:45:47.715105+01:00 \n",
|
||
"\n",
|
||
" updated_at fidelity_delay \\\n",
|
||
"0 2020-09-03 12:24:22.574262+02:00 36 \n",
|
||
"1 2020-09-03 14:05:04.648137+02:00 36 \n",
|
||
"2 2020-09-03 14:15:17.252539+02:00 36 \n",
|
||
"3 2020-09-03 13:11:23.117024+02:00 36 \n",
|
||
"4 2021-12-22 09:45:47.715105+01:00 36 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 c00f0c4675b91fb8b918e4079a0b1bac \n",
|
||
"1 efe90a8e604a7c840e88d03a67f6b7d8 \n",
|
||
"2 ee14c62b3b9f6c7dd5401685a18e4460 \n",
|
||
"3 52ff3466787b4d538407372e5f7afe0f \n",
|
||
"4 d41d8cd98f00b204e9800998ecf8427e "
|
||
]
|
||
},
|
||
"execution_count": 324,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 325,
|
||
"id": "ac93382c-0b5f-462d-8021-0dd1e7201b8c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"fidelity_delay 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 11.111111\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>c00f0c4675b91fb8b918e4079a0b1bac</td>\n",
|
||
" <td>standard</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>66</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>efe90a8e604a7c840e88d03a67f6b7d8</td>\n",
|
||
" <td>package</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>83</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>ee14c62b3b9f6c7dd5401685a18e4460</td>\n",
|
||
" <td>guide multimédias</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2723</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>d41d8cd98f00b204e9800998ecf8427e</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id fidelity_delay identifier name\n",
|
||
"0 1 36 c00f0c4675b91fb8b918e4079a0b1bac standard\n",
|
||
"1 66 36 efe90a8e604a7c840e88d03a67f6b7d8 package\n",
|
||
"2 83 36 ee14c62b3b9f6c7dd5401685a18e4460 guide multimédias\n",
|
||
"3 3 36 52ff3466787b4d538407372e5f7afe0f non défini\n",
|
||
"4 2723 36 d41d8cd98f00b204e9800998ecf8427e NaN"
|
||
]
|
||
},
|
||
"execution_count": 325,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 326,
|
||
"id": "18cbd630-3c7d-49e1-932b-9460badf3758",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"fidelity_delay int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 326,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5847a441-31b9-4802-a5ae-90d8c6d6e153",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of seasons.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 327,
|
||
"id": "ae544dcc-f23d-4216-bb5b-597cc1b3765e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1seasons.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 328,
|
||
"id": "1ac97963-9208-4329-be41-d71a5797487f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1seasons.csv\n",
|
||
"Shape : (13, 6)\n",
|
||
"Number of columns : 6\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>943</td>\n",
|
||
" <td>2013</td>\n",
|
||
" <td>2021-07-29 08:55:33.282607+02:00</td>\n",
|
||
" <td>2021-07-29 08:55:33.282607+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8038da89e49ac5eabb489cfc6cea9fc1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>129</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>2020-09-03 15:13:08.105567+02:00</td>\n",
|
||
" <td>2020-09-03 15:13:08.105567+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>cee8d6b7ce52554fd70354e37bbf44a2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2015</td>\n",
|
||
" <td>2020-09-03 13:11:19.405037+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:19.405037+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>65d2ea03425887a717c435081cfc5dbb</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2016</td>\n",
|
||
" <td>2020-09-03 13:11:19.401001+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:19.401001+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>95192c98732387165bf8e396c0f2dad2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2017</td>\n",
|
||
" <td>2020-09-03 13:11:19.409005+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:19.409005+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8d8818c8e140c64c743113f563cf750f</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 943 2013 2021-07-29 08:55:33.282607+02:00 \n",
|
||
"1 129 2014 2020-09-03 15:13:08.105567+02:00 \n",
|
||
"2 3 2015 2020-09-03 13:11:19.405037+02:00 \n",
|
||
"3 2 2016 2020-09-03 13:11:19.401001+02:00 \n",
|
||
"4 4 2017 2020-09-03 13:11:19.409005+02:00 \n",
|
||
"\n",
|
||
" updated_at start_date_time \\\n",
|
||
"0 2021-07-29 08:55:33.282607+02:00 NaN \n",
|
||
"1 2020-09-03 15:13:08.105567+02:00 NaN \n",
|
||
"2 2020-09-03 13:11:19.405037+02:00 NaN \n",
|
||
"3 2020-09-03 13:11:19.401001+02:00 NaN \n",
|
||
"4 2020-09-03 13:11:19.409005+02:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 8038da89e49ac5eabb489cfc6cea9fc1 \n",
|
||
"1 cee8d6b7ce52554fd70354e37bbf44a2 \n",
|
||
"2 65d2ea03425887a717c435081cfc5dbb \n",
|
||
"3 95192c98732387165bf8e396c0f2dad2 \n",
|
||
"4 8d8818c8e140c64c743113f563cf750f "
|
||
]
|
||
},
|
||
"execution_count": 328,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 329,
|
||
"id": "b4593d46-105c-47dd-aa71-babd8e63e65b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 7.692308\n",
|
||
"start_date_time 100.000000\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>943</td>\n",
|
||
" <td>8038da89e49ac5eabb489cfc6cea9fc1</td>\n",
|
||
" <td>2013</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>129</td>\n",
|
||
" <td>cee8d6b7ce52554fd70354e37bbf44a2</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>65d2ea03425887a717c435081cfc5dbb</td>\n",
|
||
" <td>2015</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>95192c98732387165bf8e396c0f2dad2</td>\n",
|
||
" <td>2016</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>8d8818c8e140c64c743113f563cf750f</td>\n",
|
||
" <td>2017</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id identifier name start_date_time\n",
|
||
"0 943 8038da89e49ac5eabb489cfc6cea9fc1 2013 NaN\n",
|
||
"1 129 cee8d6b7ce52554fd70354e37bbf44a2 2014 NaN\n",
|
||
"2 3 65d2ea03425887a717c435081cfc5dbb 2015 NaN\n",
|
||
"3 2 95192c98732387165bf8e396c0f2dad2 2016 NaN\n",
|
||
"4 4 8d8818c8e140c64c743113f563cf750f 2017 NaN"
|
||
]
|
||
},
|
||
"execution_count": 329,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 330,
|
||
"id": "5d3b096d-8e73-4514-94e5-f2dcd4d0a89c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"start_date_time float64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 330,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a7b00bc7-eae6-457c-ac68-a4a55a6d1c8c",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep Analysis of facilities.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 331,
|
||
"id": "d95ef015-d44c-4353-8761-771b910d21c9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1facilities.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 332,
|
||
"id": "ef5fe794-8df7-4f27-8554-ecdc4074ac0b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1facilities.csv\n",
|
||
"Shape : (2, 7)\n",
|
||
"Number of columns : 7\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>2020-09-03 13:16:35.293111+02:00</td>\n",
|
||
" <td>2020-09-03 13:16:35.293111+02:00</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>2020-09-03 13:11:23.133059+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.133059+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>702bd76fe3dd5dbcf118a6965a946f54</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 2 non défini 2020-09-03 13:16:35.293111+02:00 \n",
|
||
"1 1 mucem 2020-09-03 13:11:23.133059+02:00 \n",
|
||
"\n",
|
||
" updated_at street_id fixed_capacity \\\n",
|
||
"0 2020-09-03 13:16:35.293111+02:00 2 NaN \n",
|
||
"1 2020-09-03 13:11:23.133059+02:00 1 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 52ff3466787b4d538407372e5f7afe0f \n",
|
||
"1 702bd76fe3dd5dbcf118a6965a946f54 "
|
||
]
|
||
},
|
||
"execution_count": 332,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 333,
|
||
"id": "e3621201-fab9-49fd-95c1-0b9d5da76e50",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.0\n",
|
||
"street_id 0.0\n",
|
||
"identifier 0.0\n",
|
||
"name 0.0\n",
|
||
"fixed_capacity 100.0\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>702bd76fe3dd5dbcf118a6965a946f54</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id street_id identifier name fixed_capacity\n",
|
||
"0 2 2 52ff3466787b4d538407372e5f7afe0f non défini NaN\n",
|
||
"1 1 1 702bd76fe3dd5dbcf118a6965a946f54 mucem NaN"
|
||
]
|
||
},
|
||
"execution_count": 333,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 334,
|
||
"id": "1b198b92-8654-4531-a0dd-8f2e01c2e6c1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"street_id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"fixed_capacity float64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 334,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ab5c4c2d-3e04-457d-a183-e173df89b650",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Merge"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 335,
|
||
"id": "43576244-c8cf-4ca0-b056-7aea1fbf0bc7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def process_df_2(df):\n",
|
||
" df = remove_horodates(df)\n",
|
||
" print(\"Number of columns : \", len(df.columns))\n",
|
||
" df = order_columns_id(df)\n",
|
||
" print(\"Columns : \", df.columns)\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 336,
|
||
"id": "0fad097e-474c-4af7-b1e1-7d8dda3f09ea",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def load_dataset(name):\n",
|
||
" df = display_databases(name)\n",
|
||
" df = process_df_2(df)\n",
|
||
" # drop na :\n",
|
||
" #df = df.dropna(axis=1, thresh=len(df))\n",
|
||
" # if identifier in table : delete it\n",
|
||
" if 'identifier' in df.columns:\n",
|
||
" df = df.drop(columns = 'identifier')\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b60034ef-fdd6-4640-a012-cf74c17b333f",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Products Table"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 337,
|
||
"id": "6213b1eb-c5f8-49dd-ab69-366542380e80",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def create_products_table():\n",
|
||
" # first merge products and categories\n",
|
||
" print(\"first merge products and categories\")\n",
|
||
" products = load_dataset(\"1products.csv\")\n",
|
||
" categories = load_dataset(\"1categories.csv\")\n",
|
||
" # Drop useless columns\n",
|
||
" products = products.drop(columns = ['apply_price', 'extra_field', 'amount_consumption'])\n",
|
||
" categories = categories.drop(columns = ['extra_field', 'quota'])\n",
|
||
"\n",
|
||
" #Merge\n",
|
||
" products_theme = products.merge(categories, how = 'left', left_on = 'category_id',\n",
|
||
" right_on = 'id', suffixes=('_products', '_categories'))\n",
|
||
" products_theme = products_theme.rename(columns = {\"name\" : \"name_categories\"})\n",
|
||
" \n",
|
||
" # Second merge products_theme and type of categories\n",
|
||
" print(\"Second merge products_theme and type of categories\")\n",
|
||
" type_of_categories = load_dataset(\"1type_of_categories.csv\")\n",
|
||
" type_of_categories = type_of_categories.drop(columns = 'id')\n",
|
||
" products_theme = products_theme.merge(type_of_categories, how = 'left', left_on = 'category_id',\n",
|
||
" right_on = 'category_id' )\n",
|
||
"\n",
|
||
" # Index cleaning\n",
|
||
" products_theme = products_theme.drop(columns = ['id_categories'])\n",
|
||
" products_theme = order_columns_id(products_theme)\n",
|
||
"\n",
|
||
" \n",
|
||
"\n",
|
||
" return products_theme"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 338,
|
||
"id": "b853e020-f73d-44e8-b086-e5548ce21011",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"first merge products and categories\n",
|
||
"File path : bdc2324-data/1/1products.csv\n",
|
||
"Shape : (94803, 14)\n",
|
||
"Number of columns : 12\n",
|
||
"Columns : Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n",
|
||
" 'products_group_id', 'product_pack_id', 'identifier', 'amount',\n",
|
||
" 'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n",
|
||
" dtype='object')\n",
|
||
"File path : bdc2324-data/1/1categories.csv\n",
|
||
"Shape : (27, 7)\n",
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n",
|
||
"Second merge products_theme and type of categories\n",
|
||
"File path : bdc2324-data/1/1type_of_categories.csv\n",
|
||
"Shape : (5, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'type_of_id', 'category_id', 'identifier'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_products</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv activité tr</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tr</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_products representation_id pricing_formula_id category_id \\\n",
|
||
"0 10682 914 114 41 \n",
|
||
"1 478 273 131 1 \n",
|
||
"2 20873 275 137 1 \n",
|
||
"3 157142 82519 9 5 \n",
|
||
"4 1341 9 93 1 \n",
|
||
"\n",
|
||
" products_group_id product_pack_id type_of_id amount is_full_price \\\n",
|
||
"0 10655 1 NaN 9.0 False \n",
|
||
"1 471 1 12.0 9.5 False \n",
|
||
"2 20825 1 12.0 11.5 False \n",
|
||
"3 156773 1 NaN 8.0 False \n",
|
||
"4 1175 1 12.0 8.5 False \n",
|
||
"\n",
|
||
" name_categories \n",
|
||
"0 indiv activité tr \n",
|
||
"1 indiv entrées tp \n",
|
||
"2 indiv entrées tp \n",
|
||
"3 indiv entrées tr \n",
|
||
"4 indiv entrées tp "
|
||
]
|
||
},
|
||
"execution_count": 338,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_theme = create_products_table()\n",
|
||
"products_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "8bd7b7ab-fd04-48d2-898b-48c5815457f3",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Events Table"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 339,
|
||
"id": "6ed0ad20-8315-4112-9a85-10e5f04ef852",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def create_events_table():\n",
|
||
" # first merge events and seasons : \n",
|
||
" print(\"first merge events and seasons : \")\n",
|
||
" events = load_dataset(\"1events.csv\")\n",
|
||
" seasons = load_dataset(\"1seasons.csv\")\n",
|
||
"\n",
|
||
" # Drop useless columns\n",
|
||
" events = events.drop(columns = ['manual_added', 'is_display'])\n",
|
||
" seasons = seasons.drop(columns = ['start_date_time'])\n",
|
||
" \n",
|
||
" events_theme = events.merge(seasons, how = 'left', left_on = 'season_id', right_on = 'id', suffixes=('_events', '_seasons'))\n",
|
||
"\n",
|
||
" # Secondly merge events_theme and event_types\n",
|
||
" print(\"Secondly merge events_theme and event_types : \")\n",
|
||
" event_types = load_dataset(\"1event_types.csv\")\n",
|
||
" event_types = event_types.drop(columns = ['fidelity_delay'])\n",
|
||
" \n",
|
||
" events_theme = events_theme.merge(event_types, how = 'left', left_on = 'event_type_id', right_on = 'id', suffixes=('_events', '_event_type'))\n",
|
||
" events_theme = events_theme.rename(columns = {\"name\" : \"name_event_types\"})\n",
|
||
" events_theme = events_theme.drop(columns = 'id')\n",
|
||
"\n",
|
||
" # thirdly merge events_theme and facilities\n",
|
||
" print(\"thirdly merge events_theme and facilities : \")\n",
|
||
" facilities = load_dataset(\"1facilities.csv\")\n",
|
||
" facilities = facilities.drop(columns = ['fixed_capacity'])\n",
|
||
" \n",
|
||
" events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n",
|
||
" events_theme = events_theme.rename(columns = {\"name\" : \"name_facilities\", \"id_events\" : \"event_id\"})\n",
|
||
" events_theme = events_theme.drop(columns = 'id')\n",
|
||
"\n",
|
||
" # Index cleaning\n",
|
||
" events_theme = events_theme.drop(columns = ['id_seasons'])\n",
|
||
" events_theme = order_columns_id(events_theme)\n",
|
||
" return events_theme"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 340,
|
||
"id": "98ef0636-8c45-4a23-a62a-1fbe1544f8ce",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"first merge events and seasons : \n",
|
||
"File path : bdc2324-data/1/1events.csv\n",
|
||
"Shape : (1232, 12)\n",
|
||
"Number of columns : 10\n",
|
||
"Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n",
|
||
" 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n",
|
||
" dtype='object')\n",
|
||
"File path : bdc2324-data/1/1seasons.csv\n",
|
||
"Shape : (13, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n",
|
||
"Secondly merge events_theme and event_types : \n",
|
||
"File path : bdc2324-data/1/1event_types.csv\n",
|
||
"Shape : (9, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n",
|
||
"thirdly merge events_theme and facilities : \n",
|
||
"File path : bdc2324-data/1/1facilities.csv\n",
|
||
"Shape : (2, 7)\n",
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>event_id</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>2023</td>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" event_id season_id facility_id event_type_id event_type_key_id \\\n",
|
||
"0 192 16 1 4 4 \n",
|
||
"1 30329 2767 1 5 5 \n",
|
||
"2 161 16 1 2 2 \n",
|
||
"3 5957 582 1 4 4 \n",
|
||
"4 8337 582 1 4 4 \n",
|
||
"\n",
|
||
" facility_key_id street_id \\\n",
|
||
"0 1 1 \n",
|
||
"1 1 1 \n",
|
||
"2 1 1 \n",
|
||
"3 1 1 \n",
|
||
"4 1 1 \n",
|
||
"\n",
|
||
" name_events name_seasons \\\n",
|
||
"0 frontières 2018 \n",
|
||
"1 visite guidée une autre histoire du monde (1h00) 2023 \n",
|
||
"2 visite contée les chercheurs d'or indiv 2018 \n",
|
||
"3 we dreamt of utopia and we woke up screaming. 2021 \n",
|
||
"4 jeff koons épisodes 4 2021 \n",
|
||
"\n",
|
||
" name_event_types name_facilities \n",
|
||
"0 spectacle vivant mucem \n",
|
||
"1 offre muséale groupe mucem \n",
|
||
"2 offre muséale individuel mucem \n",
|
||
"3 spectacle vivant mucem \n",
|
||
"4 spectacle vivant mucem "
|
||
]
|
||
},
|
||
"execution_count": 340,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"events_theme= create_events_table()\n",
|
||
"events_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "4ad5b680-bb27-4f86-a5f3-7ff4fd1be96a",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Representations_Table"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 341,
|
||
"id": "481dddd6-80a8-4b9e-a05e-ed06fa3ed7a6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def create_representations_table():\n",
|
||
" representations = load_dataset(\"1representations.csv\")\n",
|
||
" representations = representations.drop(columns = ['serial', 'open', 'satisfaction', 'is_display', 'expected_filling',\n",
|
||
" 'max_filling', 'extra_field', 'start_date_time', 'end_date_time', 'name',\n",
|
||
" 'representation_type_id'])\n",
|
||
" \n",
|
||
" representations_capacity = load_dataset(\"1representation_category_capacities.csv\")\n",
|
||
" representations_capacity = representations_capacity.drop(columns = ['expected_filling', 'max_filling'])\n",
|
||
"\n",
|
||
" representations_theme = representations.merge(representations_capacity, how='left',\n",
|
||
" left_on='id', right_on='representation_id',\n",
|
||
" suffixes=('_representation', '_representation_cap'))\n",
|
||
" # index cleaning\n",
|
||
" representations_theme = representations_theme.drop(columns = [\"id_representation\"])\n",
|
||
" representations_theme = order_columns_id(representations_theme)\n",
|
||
" return representations_theme"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 342,
|
||
"id": "677f4ed8-ef58-45f2-9056-ede0898c6a64",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1representations.csv\n",
|
||
"Shape : (36095, 16)\n",
|
||
"Number of columns : 14\n",
|
||
"Columns : Index(['id', 'event_id', 'representation_type_id', 'identifier', 'serial',\n",
|
||
" 'start_date_time', 'open', 'satisfaction', 'end_date_time', 'name',\n",
|
||
" 'is_display', 'expected_filling', 'max_filling', 'extra_field'],\n",
|
||
" dtype='object')\n",
|
||
"File path : bdc2324-data/1/1representation_category_capacities.csv\n",
|
||
"Shape : (65241, 7)\n",
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'representation_id', 'category_id', 'expected_filling',\n",
|
||
" 'max_filling'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>event_id</th>\n",
|
||
" <th>id_representation_cap</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>12384</td>\n",
|
||
" <td>123058</td>\n",
|
||
" <td>84820</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>37</td>\n",
|
||
" <td>2514</td>\n",
|
||
" <td>269</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>37</td>\n",
|
||
" <td>384</td>\n",
|
||
" <td>269</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>37</td>\n",
|
||
" <td>2515</td>\n",
|
||
" <td>269</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>37</td>\n",
|
||
" <td>383</td>\n",
|
||
" <td>269</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" event_id id_representation_cap representation_id category_id\n",
|
||
"0 12384 123058 84820 2\n",
|
||
"1 37 2514 269 2\n",
|
||
"2 37 384 269 5\n",
|
||
"3 37 2515 269 10\n",
|
||
"4 37 383 269 1"
|
||
]
|
||
},
|
||
"execution_count": 342,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"representation_theme = create_representations_table()\n",
|
||
"representation_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e274e3cc-1b41-43e0-8412-1563166060cb",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Price Table"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 343,
|
||
"id": "c52621e7-01de-48dc-b572-2974542a8be5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1product_packs.csv\n",
|
||
"Shape : (1, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'type_of'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>type_of</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name type_of\n",
|
||
"0 1 NaN 0"
|
||
]
|
||
},
|
||
"execution_count": 343,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"product_packs = load_dataset(\"1product_packs.csv\")\n",
|
||
"product_packs.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 344,
|
||
"id": "9e4f60ab-9a2c-4090-b0c4-f9a1530b2d39",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1pricing_formulas.csv\n",
|
||
"Shape : (556, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'extra_field'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>41909</td>\n",
|
||
" <td>visite mécènes 1h30</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>502</td>\n",
|
||
" <td>entree mucem tp( expo picasso)</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>504</td>\n",
|
||
" <td>nombre de personnes cinema</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>117</td>\n",
|
||
" <td>spectacle tarif e famille tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1496</td>\n",
|
||
" <td>billet nb famille mecene 1a</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name extra_field\n",
|
||
"0 41909 visite mécènes 1h30 NaN\n",
|
||
"1 502 entree mucem tp( expo picasso) NaN\n",
|
||
"2 504 nombre de personnes cinema NaN\n",
|
||
"3 117 spectacle tarif e famille tr NaN\n",
|
||
"4 1496 billet nb famille mecene 1a NaN"
|
||
]
|
||
},
|
||
"execution_count": 344,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pricing_formula = load_dataset(\"1pricing_formulas.csv\")\n",
|
||
"pricing_formula.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 345,
|
||
"id": "247b5c45-a18a-4cfd-86b4-d3453e157bcd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1type_of_pricing_formulas.csv\n",
|
||
"Shape : (568, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'type_of_id', 'pricing_formula_id', 'identifier'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>127</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2425</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2937</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>48</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id type_of_id pricing_formula_id\n",
|
||
"0 1 1 127\n",
|
||
"1 2 1 2425\n",
|
||
"2 3 1 2937\n",
|
||
"3 4 1 48\n",
|
||
"4 5 1 7"
|
||
]
|
||
},
|
||
"execution_count": 345,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type_pricing_formula = load_dataset(\"1type_of_pricing_formulas.csv\")\n",
|
||
"type_pricing_formula.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 346,
|
||
"id": "4b48f7b3-0f06-4ef6-9355-5016af82f49c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1products_groups.csv\n",
|
||
"Shape : (92973, 9)\n",
|
||
"Number of columns : 7\n",
|
||
"Columns : Index(['id', 'category_id', 'pricing_formula_id', 'representation_id',\n",
|
||
" 'percent_price', 'max_price', 'min_price'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>percent_price</th>\n",
|
||
" <th>max_price</th>\n",
|
||
" <th>min_price</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2735</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>97</td>\n",
|
||
" <td>1534</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>14387</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>79</td>\n",
|
||
" <td>8046</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2770</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>1563</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>27179</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>119</td>\n",
|
||
" <td>14192</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id category_id pricing_formula_id representation_id percent_price \\\n",
|
||
"0 2735 8 97 1534 100.0 \n",
|
||
"1 156773 5 9 82519 100.0 \n",
|
||
"2 14387 16 79 8046 100.0 \n",
|
||
"3 2770 2 37 1563 100.0 \n",
|
||
"4 27179 13 119 14192 100.0 \n",
|
||
"\n",
|
||
" max_price min_price \n",
|
||
"0 0.0 0.0 \n",
|
||
"1 0.0 0.0 \n",
|
||
"2 0.0 0.0 \n",
|
||
"3 0.0 0.0 \n",
|
||
"4 0.0 0.0 "
|
||
]
|
||
},
|
||
"execution_count": 346,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"product_groups = load_dataset(\"1products_groups.csv\")\n",
|
||
"product_groups.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "71c26a38-6818-42df-8aee-0135681a5563",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Uniform Products theme database"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 347,
|
||
"id": "b26f4e7e-134d-4e32-a615-4b0e6bb80b25",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Products theme columns : Index(['id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
|
||
" 'products_group_id', 'product_pack_id', 'type_of_id', 'amount',\n",
|
||
" 'is_full_price', 'name_categories'],\n",
|
||
" dtype='object')\n",
|
||
"\n",
|
||
" Representation theme columns : Index(['event_id', 'id_representation_cap', 'representation_id',\n",
|
||
" 'category_id'],\n",
|
||
" dtype='object')\n",
|
||
"\n",
|
||
" Events theme columns : Index(['event_id', 'season_id', 'facility_id', 'event_type_id',\n",
|
||
" 'event_type_key_id', 'facility_key_id', 'street_id', 'name_events',\n",
|
||
" 'name_seasons', 'name_event_types', 'name_facilities'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"Products theme columns : \", products_theme.columns)\n",
|
||
"print(\"\\n Representation theme columns : \", representation_theme.columns)\n",
|
||
"print(\"\\n Events theme columns : \", events_theme.columns)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 348,
|
||
"id": "d40b1e3b-b1f3-4915-8ebc-6bb7856da42a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_products</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>event_id</th>\n",
|
||
" <th>id_representation_cap</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv activité tr</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>8789</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>390</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>395</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tr</td>\n",
|
||
" <td>12365</td>\n",
|
||
" <td>120199</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>21</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_products representation_id pricing_formula_id category_id \\\n",
|
||
"0 10682 914 114 41 \n",
|
||
"1 478 273 131 1 \n",
|
||
"2 20873 275 137 1 \n",
|
||
"3 157142 82519 9 5 \n",
|
||
"4 1341 9 93 1 \n",
|
||
"\n",
|
||
" products_group_id product_pack_id type_of_id amount is_full_price \\\n",
|
||
"0 10655 1 NaN 9.0 False \n",
|
||
"1 471 1 12.0 9.5 False \n",
|
||
"2 20825 1 12.0 11.5 False \n",
|
||
"3 156773 1 NaN 8.0 False \n",
|
||
"4 1175 1 12.0 8.5 False \n",
|
||
"\n",
|
||
" name_categories event_id id_representation_cap \n",
|
||
"0 indiv activité tr 132 8789 \n",
|
||
"1 indiv entrées tp 37 390 \n",
|
||
"2 indiv entrées tp 37 395 \n",
|
||
"3 indiv entrées tr 12365 120199 \n",
|
||
"4 indiv entrées tp 8 21 "
|
||
]
|
||
},
|
||
"execution_count": 348,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_global = products_theme.merge(representation_theme, how='left',\n",
|
||
" on= [\"representation_id\", \"category_id\"])\n",
|
||
"\n",
|
||
"\n",
|
||
"products_global.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 349,
|
||
"id": "78d75a08-e959-429c-847a-7d70a2804806",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_products</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>event_id</th>\n",
|
||
" <th>id_representation_cap</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>132</td>\n",
|
||
" <td>8789</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv activité tr</td>\n",
|
||
" <td>visite-jeu \"le classico des minots\" (1h30)</td>\n",
|
||
" <td>2017</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>390</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>billet mucem picasso</td>\n",
|
||
" <td>2016</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>395</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>billet mucem picasso</td>\n",
|
||
" <td>2016</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>12365</td>\n",
|
||
" <td>120199</td>\n",
|
||
" <td>1754</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>2017</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_products representation_id pricing_formula_id category_id \\\n",
|
||
"0 10682 914 114 41 \n",
|
||
"1 478 273 131 1 \n",
|
||
"2 20873 275 137 1 \n",
|
||
"3 157142 82519 9 5 \n",
|
||
"4 1341 9 93 1 \n",
|
||
"\n",
|
||
" products_group_id product_pack_id type_of_id event_id \\\n",
|
||
"0 10655 1 NaN 132 \n",
|
||
"1 471 1 12.0 37 \n",
|
||
"2 20825 1 12.0 37 \n",
|
||
"3 156773 1 NaN 12365 \n",
|
||
"4 1175 1 12.0 8 \n",
|
||
"\n",
|
||
" id_representation_cap season_id ... event_type_key_id facility_key_id \\\n",
|
||
"0 8789 4 ... 5 1 \n",
|
||
"1 390 2 ... 2 1 \n",
|
||
"2 395 2 ... 2 1 \n",
|
||
"3 120199 1754 ... 4 1 \n",
|
||
"4 21 4 ... 6 1 \n",
|
||
"\n",
|
||
" street_id amount is_full_price name_categories \\\n",
|
||
"0 1 9.0 False indiv activité tr \n",
|
||
"1 1 9.5 False indiv entrées tp \n",
|
||
"2 1 11.5 False indiv entrées tp \n",
|
||
"3 1 8.0 False indiv entrées tr \n",
|
||
"4 1 8.5 False indiv entrées tp \n",
|
||
"\n",
|
||
" name_events name_seasons \\\n",
|
||
"0 visite-jeu \"le classico des minots\" (1h30) 2017 \n",
|
||
"1 billet mucem picasso 2016 \n",
|
||
"2 billet mucem picasso 2016 \n",
|
||
"3 NaN NaN \n",
|
||
"4 non défini 2017 \n",
|
||
"\n",
|
||
" name_event_types name_facilities \n",
|
||
"0 offre muséale individuel mucem \n",
|
||
"1 offre muséale individuel mucem \n",
|
||
"2 offre muséale individuel mucem \n",
|
||
"3 offre muséale individuel mucem \n",
|
||
"4 non défini mucem \n",
|
||
"\n",
|
||
"[5 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 349,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_global = products_global.merge(events_theme, how='left', on='event_id',\n",
|
||
" suffixes = (\"_representation\", \"_event\"))\n",
|
||
"products_global = order_columns_id(products_global)\n",
|
||
"products_global.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 350,
|
||
"id": "4a6950e8-4818-4df2-afa9-562e0921698c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n",
|
||
" 'products_group_id', 'product_pack_id', 'type_of_id', 'event_id',\n",
|
||
" 'id_representation_cap', 'season_id', 'facility_id', 'event_type_id',\n",
|
||
" 'event_type_key_id', 'facility_key_id', 'street_id', 'amount',\n",
|
||
" 'is_full_price', 'name_categories', 'name_events', 'name_seasons',\n",
|
||
" 'name_event_types', 'name_facilities'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 350,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_global.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 351,
|
||
"id": "b18f6428-90e0-4b1b-9b8d-bad995fb6c98",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(94803, 22)"
|
||
]
|
||
},
|
||
"execution_count": 351,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_global.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c3caf2fd-178e-48e9-b95f-5798bd576f5d",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Analysis of Products_global"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 352,
|
||
"id": "33ee07a2-d871-4436-9860-9be389bc4902",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id_products 0\n",
|
||
"representation_id 0\n",
|
||
"pricing_formula_id 0\n",
|
||
"category_id 0\n",
|
||
"products_group_id 0\n",
|
||
"product_pack_id 0\n",
|
||
"type_of_id 67589\n",
|
||
"event_id 0\n",
|
||
"id_representation_cap 0\n",
|
||
"season_id 0\n",
|
||
"facility_id 0\n",
|
||
"event_type_id 0\n",
|
||
"event_type_key_id 0\n",
|
||
"facility_key_id 0\n",
|
||
"street_id 0\n",
|
||
"amount 0\n",
|
||
"is_full_price 0\n",
|
||
"name_categories 3991\n",
|
||
"name_events 46657\n",
|
||
"name_seasons 30663\n",
|
||
"name_event_types 0\n",
|
||
"name_facilities 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 352,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_global.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 353,
|
||
"id": "557fc475-4417-4d9f-8d4e-8c49bc42367f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array(['offre muséale individuel', 'non défini', 'spectacle vivant',\n",
|
||
" 'offre muséale groupe', 'formule adhésion'], dtype=object)"
|
||
]
|
||
},
|
||
"execution_count": 353,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# how many event types ?\n",
|
||
"\n",
|
||
"products_global['name_event_types'].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 354,
|
||
"id": "a9b9a23c-b0de-4685-97e5-d52dd78349f5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"644"
|
||
]
|
||
},
|
||
"execution_count": 354,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# how many events ?\n",
|
||
"\n",
|
||
"len(products_global['name_events'].unique())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 355,
|
||
"id": "fb374c72-58ca-404d-a86b-e834a2fc4a34",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array(['indiv activité tr', 'indiv entrées tp', 'indiv entrées tr',\n",
|
||
" 'indiv prog enfant', 'indiv activité gr', 'indiv prog gr',\n",
|
||
" 'indiv activité tp', 'indiv activité enfant', 'indiv entrées gr',\n",
|
||
" 'groupe forfait entrées tr', 'groupe autonome adulte',\n",
|
||
" 'indiv prog tp', 'indiv prog tr', 'indiv entrées fa',\n",
|
||
" 'groupe forfait scolaire', 'en nb entrées tr', 'non défini', nan,\n",
|
||
" 'en nb entrées gr', 'groupe autonome entrées gr',\n",
|
||
" 'groupe forfait entrées gr', 'groupe autonome entrées tr',\n",
|
||
" 'en nb entrées tp', 'groupe autonome gr',\n",
|
||
" 'groupe autonome entrées tp', 'groupe forfait adulte',\n",
|
||
" 'groupe forfait etudiant'], dtype=object)"
|
||
]
|
||
},
|
||
"execution_count": 355,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# how many categories ?\n",
|
||
"products_global['name_categories'].unique()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 356,
|
||
"id": "11f89771-8d50-4ef4-b34e-53e4f6b419bb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"27"
|
||
]
|
||
},
|
||
"execution_count": 356,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(products_global['category_id'].unique())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 357,
|
||
"id": "8add1ff2-b7e8-4381-90d8-d18d8660ed39",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def uniform_product_df():\n",
|
||
" \"\"\"\n",
|
||
" This function returns the uniform product dataset\n",
|
||
" \"\"\"\n",
|
||
" print(\"Products theme columns : \", products_theme.columns)\n",
|
||
" print(\"\\n Representation theme columns : \", representation_theme.columns)\n",
|
||
" print(\"\\n Events theme columns : \", events_theme.columns)\n",
|
||
"\n",
|
||
" products_global = products_theme.merge(representation_theme, how='left',\n",
|
||
" on= [\"representation_id\", \"category_id\"])\n",
|
||
" \n",
|
||
" products_global = products_global.merge(events_theme, how='left', on='event_id',\n",
|
||
" suffixes = (\"_representation\", \"_event\"))\n",
|
||
" \n",
|
||
" products_global = order_columns_id(products_global)\n",
|
||
"\n",
|
||
" # remove useless columns \n",
|
||
" products_global = products_global.drop(columns = ['type_of_id', 'name_events', 'name_seasons', 'name_categories'])\n",
|
||
" return products_global\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b9303d35-4449-4cb6-887b-73a75f3cb868",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Investigate Customer Plus"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 358,
|
||
"id": "1fd9dcb0-164a-4fd0-90c3-2fd9e7b44016",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1customersplus.csv\n",
|
||
"Shape : (151866, 43)\n",
|
||
"Number of columns : 41\n",
|
||
"Columns : Index(['id', 'street_id', 'identifier', 'structure_id', 'mcp_contact_id',\n",
|
||
" 'fidelity', 'tenant_id', 'lastname', 'firstname', 'birthdate', 'email',\n",
|
||
" 'civility', 'is_partner', 'extra', 'deleted_at', 'reference', 'gender',\n",
|
||
" 'is_email_true', 'extra_field', 'opt_in', 'note', 'profession',\n",
|
||
" 'language', 'need_reload', 'last_buying_date', 'max_price',\n",
|
||
" 'ticket_sum', 'average_price', 'average_purchase_delay',\n",
|
||
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
|
||
" 'preferred_category', 'preferred_supplier', 'preferred_formula',\n",
|
||
" 'purchase_count', 'first_buying_date', 'last_visiting_date', 'zipcode',\n",
|
||
" 'country', 'age'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>lastname</th>\n",
|
||
" <th>firstname</th>\n",
|
||
" <th>birthdate</th>\n",
|
||
" <th>email</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>total_price</th>\n",
|
||
" <th>preferred_category</th>\n",
|
||
" <th>preferred_supplier</th>\n",
|
||
" <th>preferred_formula</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>last_visiting_date</th>\n",
|
||
" <th>zipcode</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>age</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>12751</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>lastname12751</td>\n",
|
||
" <td>firstname12751</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>12825</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>lastname12825</td>\n",
|
||
" <td>firstname12825</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11261</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>lastname11261</td>\n",
|
||
" <td>firstname11261</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13071</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>lastname13071</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>653061</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email653061</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 40 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
|
||
"0 12751 2 NaN NaN 0 1311 \n",
|
||
"1 12825 2 NaN NaN 0 1311 \n",
|
||
"2 11261 2 NaN NaN 0 1311 \n",
|
||
"3 13071 2 NaN NaN 0 1311 \n",
|
||
"4 653061 10 NaN NaN 0 1311 \n",
|
||
"\n",
|
||
" lastname firstname birthdate email ... total_price \\\n",
|
||
"0 lastname12751 firstname12751 NaN NaN ... NaN \n",
|
||
"1 lastname12825 firstname12825 NaN NaN ... NaN \n",
|
||
"2 lastname11261 firstname11261 NaN NaN ... NaN \n",
|
||
"3 lastname13071 NaN NaN NaN ... NaN \n",
|
||
"4 NaN NaN NaN email653061 ... NaN \n",
|
||
"\n",
|
||
" preferred_category preferred_supplier preferred_formula purchase_count \\\n",
|
||
"0 NaN NaN NaN 0 \n",
|
||
"1 NaN NaN NaN 0 \n",
|
||
"2 NaN NaN NaN 0 \n",
|
||
"3 NaN NaN NaN 0 \n",
|
||
"4 NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" first_buying_date last_visiting_date zipcode country age \n",
|
||
"0 NaN NaN NaN fr NaN \n",
|
||
"1 NaN NaN NaN fr NaN \n",
|
||
"2 NaN NaN NaN fr NaN \n",
|
||
"3 NaN NaN NaN fr NaN \n",
|
||
"4 NaN NaN NaN NaN NaN \n",
|
||
"\n",
|
||
"[5 rows x 40 columns]"
|
||
]
|
||
},
|
||
"execution_count": 358,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_plus = load_dataset(\"1customersplus.csv\")\n",
|
||
"customer_plus.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 359,
|
||
"id": "e4a5f890-d5aa-40d7-a70c-8d8a254a5c9a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id 0\n",
|
||
"street_id 0\n",
|
||
"structure_id 133752\n",
|
||
"mcp_contact_id 52965\n",
|
||
"fidelity 0\n",
|
||
"tenant_id 0\n",
|
||
"lastname 66003\n",
|
||
"firstname 68333\n",
|
||
"birthdate 146429\n",
|
||
"email 13094\n",
|
||
"civility 151866\n",
|
||
"is_partner 0\n",
|
||
"extra 151866\n",
|
||
"deleted_at 151866\n",
|
||
"reference 151866\n",
|
||
"gender 0\n",
|
||
"is_email_true 0\n",
|
||
"extra_field 151866\n",
|
||
"opt_in 0\n",
|
||
"note 150960\n",
|
||
"profession 145660\n",
|
||
"language 150774\n",
|
||
"need_reload 0\n",
|
||
"last_buying_date 78444\n",
|
||
"max_price 78444\n",
|
||
"ticket_sum 0\n",
|
||
"average_price 13120\n",
|
||
"average_purchase_delay 78444\n",
|
||
"average_price_basket 78444\n",
|
||
"average_ticket_basket 78444\n",
|
||
"total_price 65324\n",
|
||
"preferred_category 151866\n",
|
||
"preferred_supplier 151866\n",
|
||
"preferred_formula 151866\n",
|
||
"purchase_count 0\n",
|
||
"first_buying_date 78444\n",
|
||
"last_visiting_date 151866\n",
|
||
"zipcode 108093\n",
|
||
"country 8291\n",
|
||
"age 146429\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 359,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_plus.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "55ac8ec6-baa2-4199-b29a-d931260a6970",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Analysis of Customer Products"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 360,
|
||
"id": "de370d66-852e-46a1-8fb4-5c1e5756f5cd",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import matplotlib.pyplot as plt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 361,
|
||
"id": "088a1f50-cf5d-4d1a-891d-4e9df7e1c35b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>birthdate</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>profession</th>\n",
|
||
" <th>language</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>avg_amount</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>12751</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>12825</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11261</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13071</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>653061</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0 days 19:53:02.500000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 31 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id birthdate street_id is_partner gender is_email_true \\\n",
|
||
"0 12751 NaN 2 False 1 True \n",
|
||
"1 12825 NaN 2 False 2 True \n",
|
||
"2 11261 NaN 2 False 1 True \n",
|
||
"3 13071 NaN 2 False 2 True \n",
|
||
"4 653061 NaN 10 False 2 True \n",
|
||
"\n",
|
||
" opt_in structure_id profession language ... first_buying_date country \\\n",
|
||
"0 True NaN NaN NaN ... NaN fr \n",
|
||
"1 True NaN NaN NaN ... NaN fr \n",
|
||
"2 True NaN NaN NaN ... NaN fr \n",
|
||
"3 True NaN NaN NaN ... NaN fr \n",
|
||
"4 False NaN NaN NaN ... NaN NaN \n",
|
||
"\n",
|
||
" age tenant_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
|
||
"0 NaN 1311 NaN NaN NaN \n",
|
||
"1 NaN 1311 NaN NaN NaN \n",
|
||
"2 NaN 1311 NaN NaN NaN \n",
|
||
"3 NaN 1311 NaN NaN NaN \n",
|
||
"4 NaN 1311 80.0 2.0 0 days 19:53:02.500000 \n",
|
||
"\n",
|
||
" event_type_id nb_tickets avg_amount \n",
|
||
"0 NaN NaN NaN \n",
|
||
"1 NaN NaN NaN \n",
|
||
"2 NaN NaN NaN \n",
|
||
"3 NaN NaN NaN \n",
|
||
"4 NaN NaN NaN \n",
|
||
"\n",
|
||
"[5 rows x 31 columns]"
|
||
]
|
||
},
|
||
"execution_count": 361,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_product = pd.read_csv(\"customer_product.csv\")\n",
|
||
"customer_product.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 362,
|
||
"id": "bdd582af-0cf1-4e04-90ad-7165b8a36ac8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(156289, 31)\n",
|
||
"Index(['customer_id', 'birthdate', 'street_id', 'is_partner', 'gender',\n",
|
||
" 'is_email_true', 'opt_in', 'structure_id', 'profession', 'language',\n",
|
||
" 'mcp_contact_id', 'last_buying_date', 'max_price', 'ticket_sum',\n",
|
||
" 'average_price', 'fidelity', 'average_purchase_delay',\n",
|
||
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
|
||
" 'purchase_count', 'first_buying_date', 'country', 'age', 'tenant_id',\n",
|
||
" 'nb_campaigns', 'nb_campaigns_opened', 'time_to_open', 'event_type_id',\n",
|
||
" 'nb_tickets', 'avg_amount'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Shape :\n",
|
||
"print(customer_product.shape)\n",
|
||
"# columns : \n",
|
||
"print(customer_product.columns)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 363,
|
||
"id": "55fa2361-ebde-4472-b8d2-521a20be766d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0\n",
|
||
"birthdate 149375\n",
|
||
"street_id 0\n",
|
||
"is_partner 0\n",
|
||
"gender 0\n",
|
||
"is_email_true 0\n",
|
||
"opt_in 0\n",
|
||
"structure_id 136867\n",
|
||
"profession 150004\n",
|
||
"language 155184\n",
|
||
"mcp_contact_id 53519\n",
|
||
"last_buying_date 78445\n",
|
||
"max_price 78445\n",
|
||
"ticket_sum 0\n",
|
||
"average_price 13120\n",
|
||
"fidelity 0\n",
|
||
"average_purchase_delay 78445\n",
|
||
"average_price_basket 78445\n",
|
||
"average_ticket_basket 78445\n",
|
||
"total_price 65325\n",
|
||
"purchase_count 0\n",
|
||
"first_buying_date 78445\n",
|
||
"country 8304\n",
|
||
"age 149375\n",
|
||
"tenant_id 0\n",
|
||
"nb_campaigns 21623\n",
|
||
"nb_campaigns_opened 21623\n",
|
||
"time_to_open 69017\n",
|
||
"event_type_id 78355\n",
|
||
"nb_tickets 78355\n",
|
||
"avg_amount 78355\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 363,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# check NA\n",
|
||
"\n",
|
||
"customer_product.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 364,
|
||
"id": "2e228eb6-8cc7-4fd7-8e17-2b818095cb96",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>ticket_sum</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>avg_amount</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>12751</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>12825</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11261</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13071</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>653061</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id gender is_partner is_email_true nb_campaigns \\\n",
|
||
"0 12751 1 False True NaN \n",
|
||
"1 12825 2 False True NaN \n",
|
||
"2 11261 1 False True NaN \n",
|
||
"3 13071 2 False True NaN \n",
|
||
"4 653061 2 False True 80.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened fidelity nb_tickets ticket_sum average_price \\\n",
|
||
"0 NaN 0 NaN 0 0.0 \n",
|
||
"1 NaN 0 NaN 0 0.0 \n",
|
||
"2 NaN 0 NaN 0 0.0 \n",
|
||
"3 NaN 0 NaN 0 0.0 \n",
|
||
"4 2.0 0 NaN 0 0.0 \n",
|
||
"\n",
|
||
" avg_amount event_type_id \n",
|
||
"0 NaN NaN \n",
|
||
"1 NaN NaN \n",
|
||
"2 NaN NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 364,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"## Investigate a subset of variables\n",
|
||
"\n",
|
||
"df = customer_product[[\"customer_id\", \"gender\", \"is_partner\", \"is_email_true\",\"nb_campaigns\", \"nb_campaigns_opened\", \"fidelity\",\n",
|
||
" \"nb_tickets\", \"ticket_sum\", \"average_price\", \"avg_amount\", \"event_type_id\"]]\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 368,
|
||
"id": "80120f51-f91e-4d4d-9578-1dc88cd94754",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"shape : (156289, 12)\n",
|
||
"Nombre de customer unique : 151866\n",
|
||
"Nombre de ligne où nb_tickets est non nul : 77934\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"shape : \", df.shape)\n",
|
||
"print(\"Nombre de customer unique : \", len(df[\"customer_id\"].unique()))\n",
|
||
"print(\"Nombre de ligne où nb_tickets est non nul : \", df[\"nb_tickets\"].count())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 370,
|
||
"id": "0d56bfa9-c93c-42ee-bec2-96f0598fce2c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de consommateur unique : 73511\n",
|
||
"Nombre de type d'évènement : 4\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>ticket_sum</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>avg_amount</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>162</th>\n",
|
||
" <td>309255</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>195</th>\n",
|
||
" <td>7772</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>133.0</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2.800000</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>197</th>\n",
|
||
" <td>280009</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>116.0</td>\n",
|
||
" <td>32.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11.000000</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>199</th>\n",
|
||
" <td>1556</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>23.333333</td>\n",
|
||
" <td>6.150659</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>200</th>\n",
|
||
" <td>1556</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>23.333333</td>\n",
|
||
" <td>6.439463</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>156245</th>\n",
|
||
" <td>293753</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>94.0</td>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11.000000</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>156246</th>\n",
|
||
" <td>293798</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>12.000000</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>156281</th>\n",
|
||
" <td>295224</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>98.0</td>\n",
|
||
" <td>98</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.150659</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>156287</th>\n",
|
||
" <td>295366</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>11.000000</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>156288</th>\n",
|
||
" <td>295368</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>11.000000</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>77934 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id gender is_partner is_email_true nb_campaigns \\\n",
|
||
"162 309255 2 False True 2.0 \n",
|
||
"195 7772 0 False True 133.0 \n",
|
||
"197 280009 0 False True 116.0 \n",
|
||
"199 1556 0 False True 9.0 \n",
|
||
"200 1556 0 False True 9.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"156245 293753 2 False True 94.0 \n",
|
||
"156246 293798 2 False True 7.0 \n",
|
||
"156281 295224 2 False True 10.0 \n",
|
||
"156287 295366 2 False True 5.0 \n",
|
||
"156288 295368 2 False True 5.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened fidelity nb_tickets ticket_sum average_price \\\n",
|
||
"162 2.0 0 2.0 0 0.000000 \n",
|
||
"195 19.0 0 5.0 5 2.800000 \n",
|
||
"197 32.0 1 1.0 1 11.000000 \n",
|
||
"199 8.0 1 2.0 3 23.333333 \n",
|
||
"200 8.0 1 1.0 3 23.333333 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"156245 34.0 1 1.0 1 11.000000 \n",
|
||
"156246 0.0 2 2.0 2 12.000000 \n",
|
||
"156281 0.0 1 98.0 98 0.000000 \n",
|
||
"156287 0.0 1 3.0 3 11.000000 \n",
|
||
"156288 0.0 1 2.0 2 11.000000 \n",
|
||
"\n",
|
||
" avg_amount event_type_id \n",
|
||
"162 7.762474 4.0 \n",
|
||
"195 7.762474 4.0 \n",
|
||
"197 7.762474 4.0 \n",
|
||
"199 6.150659 2.0 \n",
|
||
"200 6.439463 6.0 \n",
|
||
"... ... ... \n",
|
||
"156245 7.762474 4.0 \n",
|
||
"156246 7.762474 4.0 \n",
|
||
"156281 6.150659 2.0 \n",
|
||
"156287 7.762474 4.0 \n",
|
||
"156288 7.762474 4.0 \n",
|
||
"\n",
|
||
"[77934 rows x 12 columns]"
|
||
]
|
||
},
|
||
"execution_count": 370,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Filter only customer that buy tickets\n",
|
||
"\n",
|
||
"df_purchase = df.dropna(subset= [\"nb_tickets\"])\n",
|
||
"print(\"Nombre de consommateur unique : \", len(df_purchase[\"customer_id\"].unique()))\n",
|
||
"print(\"Nombre de type d'évènement : \", len(df_purchase[\"event_type_id\"].unique()))\n",
|
||
"#print(\"Nombre de type d'évènement (nom) : \", len(df_purchase[\"name_event_types\"].unique()))\n",
|
||
"df_purchase"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 371,
|
||
"id": "0cc96c4e-f3f3-43d2-94b5-a11719f09607",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "KeyError",
|
||
"evalue": "'name_event_types'",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[0;32mIn[371], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m event_counts \u001b[38;5;241m=\u001b[39m \u001b[43mdf_purchase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mname_event_types\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mnunique()\n\u001b[1;32m 3\u001b[0m event_counts\u001b[38;5;241m.\u001b[39mplot(kind\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbar\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 4\u001b[0m plt\u001b[38;5;241m.\u001b[39mxlabel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mType d\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mévènement\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/frame.py:8869\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, observed, dropna)\u001b[0m\n\u001b[1;32m 8866\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m level \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m by \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 8867\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou have to supply one of \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mby\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlevel\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 8869\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameGroupBy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8870\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8871\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8872\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8873\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8874\u001b[0m \u001b[43m \u001b[49m\u001b[43mas_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mas_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8875\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8876\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroup_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8877\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8878\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8879\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/groupby/groupby.py:1278\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, observed, dropna)\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdropna \u001b[38;5;241m=\u001b[39m dropna\n\u001b[1;32m 1277\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m grouper \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1278\u001b[0m grouper, exclusions, obj \u001b[38;5;241m=\u001b[39m \u001b[43mget_grouper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1279\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1280\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1281\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1282\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1283\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1284\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mno_default\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1285\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1286\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1288\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m observed \u001b[38;5;129;01mis\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mno_default:\n\u001b[1;32m 1289\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(ping\u001b[38;5;241m.\u001b[39m_passed_categorical \u001b[38;5;28;01mfor\u001b[39;00m ping \u001b[38;5;129;01min\u001b[39;00m grouper\u001b[38;5;241m.\u001b[39mgroupings):\n",
|
||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/groupby/grouper.py:1009\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, validate, dropna)\u001b[0m\n\u001b[1;32m 1007\u001b[0m in_axis, level, gpr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, gpr, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1008\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(gpr)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(gpr, Grouper) \u001b[38;5;129;01mand\u001b[39;00m gpr\u001b[38;5;241m.\u001b[39mkey \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1011\u001b[0m \u001b[38;5;66;03m# Add key to exclusions\u001b[39;00m\n\u001b[1;32m 1012\u001b[0m exclusions\u001b[38;5;241m.\u001b[39madd(gpr\u001b[38;5;241m.\u001b[39mkey)\n",
|
||
"\u001b[0;31mKeyError\u001b[0m: 'name_event_types'"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"event_counts = df_purchase.groupby('name_event_types')['customer_id'].nunique()\n",
|
||
"\n",
|
||
"event_counts.plot(kind='bar')\n",
|
||
"plt.xlabel(\"Type d'évènement\")\n",
|
||
"plt.ylabel('Nombre de consommateurs uniques')\n",
|
||
"plt.title(\"Nombre de consommateurs uniques par type d'évènement\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e37ad847-7ea5-4afe-9c6d-e07a668d2a27",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"average_tickets_by_event = df_purchase.groupby('name_event_types')['nb_tickets'].mean()\n",
|
||
"\n",
|
||
"average_tickets_by_event.plot(kind='bar', figsize=(8, 5))\n",
|
||
"plt.xlabel(\"Type d'évènements\")\n",
|
||
"plt.ylabel('Nombre moyen de tickets achetés')\n",
|
||
"plt.title(\"Nombre moyen de tickets achetés par Type d'évènements\")\n",
|
||
"plt.xticks(rotation=45)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e02b260a-fcb7-418b-87a8-de2bb4e6eb0a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_purchase.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "26fa888d-dd33-4990-89bd-6a9c1391098b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Modelisation K-means"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "25346383-25e3-43e3-8ea4-fe05bd68900d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import matplotlib.pyplot as plt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"id": "453c317a-b979-4fac-a9eb-60d72bf9caa8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "07bb1dbc-1543-49b0-a41c-6d5f569698d0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(156289, 41)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>birthdate</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>profession</th>\n",
|
||
" <th>language</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>avg_amount</th>\n",
|
||
" <th>nb_categories</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1705.261192</td>\n",
|
||
" <td>1456.333715</td>\n",
|
||
" <td>248.927477</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>formule adhésion</td>\n",
|
||
" <td>6.439463</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2041.274549</td>\n",
|
||
" <td>1340.308160</td>\n",
|
||
" <td>700.966389</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>6.150659</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1511.177396</td>\n",
|
||
" <td>42.428692</td>\n",
|
||
" <td>1468.748704</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>222.0</td>\n",
|
||
" <td>124.0</td>\n",
|
||
" <td>1 days 00:28:30.169354838</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1511.157396</td>\n",
|
||
" <td>1511.157396</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>formule adhésion</td>\n",
|
||
" <td>6.439463</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>1 days 04:31:01.428571428</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>797.033437</td>\n",
|
||
" <td>797.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>7.762474</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>1 days 04:31:01.428571428</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 41 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id birthdate street_id is_partner gender is_email_true \\\n",
|
||
"4 2 NaN 2 False 1 True \n",
|
||
"5 2 NaN 2 False 1 True \n",
|
||
"6 3 NaN 3 False 1 True \n",
|
||
"7 4 NaN 4 False 0 True \n",
|
||
"8 4 NaN 4 False 0 True \n",
|
||
"\n",
|
||
" opt_in structure_id profession language ... purchase_date_min \\\n",
|
||
"4 True NaN NaN NaN ... 1705.261192 \n",
|
||
"5 True NaN NaN NaN ... 2041.274549 \n",
|
||
"6 True NaN NaN NaN ... 1511.177396 \n",
|
||
"7 True NaN NaN NaN ... 1511.157396 \n",
|
||
"8 True NaN NaN NaN ... 797.033437 \n",
|
||
"\n",
|
||
" purchase_date_max time_between_purchase nb_tickets_internet \\\n",
|
||
"4 1456.333715 248.927477 0.0 \n",
|
||
"5 1340.308160 700.966389 0.0 \n",
|
||
"6 42.428692 1468.748704 6.0 \n",
|
||
"7 1511.157396 0.000000 2.0 \n",
|
||
"8 797.033437 0.000000 2.0 \n",
|
||
"\n",
|
||
" name_event_types avg_amount nb_categories nb_campaigns \\\n",
|
||
"4 formule adhésion 6.439463 1.0 4.0 \n",
|
||
"5 offre muséale individuel 6.150659 1.0 4.0 \n",
|
||
"6 spectacle vivant 7.762474 1.0 222.0 \n",
|
||
"7 formule adhésion 6.439463 1.0 7.0 \n",
|
||
"8 spectacle vivant 7.762474 1.0 7.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open \n",
|
||
"4 NaN NaN \n",
|
||
"5 NaN NaN \n",
|
||
"6 124.0 1 days 00:28:30.169354838 \n",
|
||
"7 7.0 1 days 04:31:01.428571428 \n",
|
||
"8 7.0 1 days 04:31:01.428571428 \n",
|
||
"\n",
|
||
"[5 rows x 41 columns]"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Chargement des données temporaires\n",
|
||
"BUCKET = \"projet-bdc2324-team1\"\n",
|
||
"FILE_KEY_S3 = \"0_Temp/Company 1 - customer_event.csv\"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" customer = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"print(customer.shape)\n",
|
||
"# Remove customer 1 as outlier\n",
|
||
"\n",
|
||
"customer = customer[customer['customer_id']!=1]\n",
|
||
"customer.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "d1f2608a-911c-440e-b7cd-5cd6684b0de3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['customer_id', 'birthdate', 'street_id', 'is_partner', 'gender',\n",
|
||
" 'is_email_true', 'opt_in', 'structure_id', 'profession', 'language',\n",
|
||
" 'mcp_contact_id', 'last_buying_date', 'max_price', 'ticket_sum',\n",
|
||
" 'average_price', 'fidelity', 'average_purchase_delay',\n",
|
||
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
|
||
" 'purchase_count', 'first_buying_date', 'country', 'age', 'tenant_id',\n",
|
||
" 'event_type_id', 'nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
|
||
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',\n",
|
||
" 'name_event_types', 'avg_amount', 'nb_categories', 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened', 'time_to_open'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"id": "fe5557f9-b629-46bf-a606-a8db2158933c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([ 6., 2., 4., 5., nan])"
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer['event_type_id'].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"id": "b1681828-165b-43be-8e42-04af052c132a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>formule adhésion</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>40</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" event_type_id name_event_types\n",
|
||
"4 6.0 formule adhésion\n",
|
||
"5 2.0 offre muséale individuel\n",
|
||
"6 4.0 spectacle vivant\n",
|
||
"16 5.0 offre muséale groupe\n",
|
||
"40 NaN NaN"
|
||
]
|
||
},
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customer[['event_type_id', 'name_event_types']].drop_duplicates()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "daef46cd-f6a5-4282-ac0a-83fde277edec",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"shape : (21388, 41)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_event1 = customer[customer['event_type_id']==2]\n",
|
||
"print(\"shape : \", customer_event1.shape)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "cd1103b4-27b5-4be1-8451-43172df2d33e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_551/3565272300.py:1: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" customer_event1[\"percent_campaign_opened\"] = 100* (customer_event1[\"nb_campaigns_opened\"] /\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"customer_event1[\"percent_campaign_opened\"] = 100* (customer_event1[\"nb_campaigns_opened\"] /\n",
|
||
" customer_event1[\"nb_campaigns\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"id": "ec19582b-7d84-422c-8f53-9b0eb1bea21e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"customer_event1 = customer_event1.fillna(0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"id": "e34437e6-a57d-4d10-ac62-5c43cdda6892",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAIhCAYAAABjbF0dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACRBUlEQVR4nOzdeVxUZfvH8c+AMIALoqSIkthm7pqWqRlugCmamlkupJm0mBkuafpULqWWmln6lO2WS7aYPpZm4JJK4pJKhUu2uItphuAKCOf3x/kxOaLG4MAg832/XvOSOeeaM9fM/eBzeXef67YYhmEgIiIiIiIO8XB1AiIiIiIi1yIV0iIiIiIiBaBCWkRERESkAFRIi4iIiIgUgAppEREREZECUCEtIiIiIlIAKqRFRERERApAhbSIiIiISAGokBYRERERKQAV0iLiMrNnz8ZisVz28d1339liQ0ND6devn+35d999h8Vi4Ysvvij6xPNh7NixWCwWPDw8+OOPP/KcP336NOXKlcNisdh9LkdMnDiRxYsX5zme+73+8MMPBbquI1q1akWrVq3+Ne7vv//mwQcfpFKlSlgsFrp06VLouWVlZfHWW2/RrFkz/P398fX1pVatWjz77LMcP368wNc9fPgwY8eOJSkpKc+53HF3hdzfiQt/by6lKP/3IVLSlXJ1AiIiH374Ibfeemue47Vr13ZBNs5VpkwZPvzwQ1588UW7459//jlZWVl4eXkV+NoTJ06ke/fuRVKUXq0XX3yRRYsW8cEHH3DjjTdSoUKFQn2/M2fO0KFDBxISEnj00Ud5/vnn8fX1JTExkalTpzJ//nzi4+OpWbOmw9c+fPgw48aNIzQ0lIYNG9qdGzBgAO3bt3fSpxCR4k6FtIi4XN26dWnSpImr0ygUDzzwAB999BHjxo3Dw+Of/wj4/vvv07VrV5YsWeLC7IpOcnIyN954I71793bK9QzD4Ny5c/j6+l7y/JAhQ1izZg0LFizggQcesB1v3bo13bt354477uC+++7jxx9/xNPT0yk5AVSrVo1q1ao57XoiUrxpaYeIXNPOnTvH0KFDCQoKwtfXl7CwMLZt25YnbsmSJTRr1gw/Pz/Kli1LeHg4iYmJtvPbt2/HYrHw+eef245t2bIFi8VCnTp17K7VuXNnGjdunK/8+vfvz4EDB4iPj7cd2717NwkJCfTv3/+Sr0lPT2f48OHUqFEDb29vqlatSmxsLKdPn7bFWCwWTp8+zUcffWRbCnPxEouTJ0/yxBNPEBgYSMWKFenWrRuHDx+2i8nJyWHy5MnceuutWK1WKlWqxEMPPcTBgwft4gzDYPLkyVSvXh0fHx9uu+02vvnmm3/9/Hv37sVisbBixQp27tyZZ9nO33//zcCBA6latSre3t7ccMMN/Oc//yEjI8PuOhaLhUGDBjFr1ixq1aqF1Wrlo48+uuR7HjlyhA8++IDIyEi7IjrXLbfcwsiRI9m+fbvd0pjQ0FCioqJYtGgR9evXx8fHhxtuuIE33njDFvPdd99x++23A/Dwww/bPs/YsWOBSy/tyL3u119/TaNGjWxLTL7++mvAXGpRq1YtSpcuzR133JFnycUPP/zAgw8+SGhoKL6+voSGhtKzZ0/27dv3r99/fqWkpNC4cWNuvvlmfv31V6ddV6SkUyEtIi6XnZ3N+fPn7R7Z2dn5eu3o0aP5448/eO+993jvvfc4fPgwrVq1sluXPH/+fO69917KlSvHJ598wvvvv09qaiqtWrUiISEBgDp16lClShVWrFhhe92KFSvw9fVlx44dtgL0/PnzrFmzhnbt2uUrv5tvvpmWLVvywQcf2I598MEHhIaG0rZt2zzxZ86cISwsjI8++ojBgwfzzTffMHLkSGbPnk3nzp0xDAOAxMREfH196dChA4mJiSQmJvLmm2/aXWvAgAF4eXkxf/58Jk+ezHfffUefPn3sYp544glGjhxJeHg4S5Ys4cUXX2T58uU0b96cv/76yxY3btw4W9zixYt54okniImJ4Zdffrni569SpQqJiYk0atSIG264wZbrbbfdxrlz52jdujUff/wxQ4cOZenSpfTp04fJkyfTrVu3PNdavHgxb731Fi+88ALffvstLVu2vOR7rl69mvPnz19xyUvuuQv/gQOQlJREbGwsQ4YMYdGiRTRv3pynn36aqVOnAnDbbbfx4YcfAvDcc8/ZPs+AAQOu+D38+OOPjBo1ipEjR/Lll1/i7+9Pt27dGDNmDO+99x4TJ05k3rx5pKWlERUVxdmzZ22v3bt3LzVr1mT69Ol8++23vPLKK6SkpHD77bfbjVFBJScn07RpU6xWK4mJidx8881XfU0Rt2GIiLjIhx9+aACXfHh6etrFVq9e3ejbt6/t+erVqw3AuO2224ycnBzb8b179xpeXl7GgAEDDMMwjOzsbCM4ONioV6+ekZ2dbYs7efKkUalSJaN58+a2Y3369DFuuOEG2/N27doZMTExRkBAgPHRRx8ZhmEY33//vQEYcXFxV/xsY8aMMQDj2LFjxocffmhYrVbj+PHjxvnz540qVaoYY8eONQzDMEqXLm33uSZNmmR4eHgYmzdvtrveF198YQDGsmXLbMcufu3F3+vAgQPtjk+ePNkAjJSUFMMwDGPnzp2XjNu4caMBGKNHjzYMwzBSU1MNHx8fo2vXrnZxud9FWFjYFb8LwzCMsLAwo06dOnbHZs2aZQDGZ599Znf8lVdeyfMdA4a/v7/x999//+t7vfzyywZgLF++/LIxZ8+eNQDjnnvusR2rXr26YbFYjKSkJLvY8PBwo1y5csbp06cNwzCMzZs3G4Dx4Ycf5rlu7rhfqHr16oavr69x8OBB27GkpCQDMKpUqWK7rmEYxuLFiw3AWLJkyWVzP3/+vHHq1CmjdOnSxuuvv247nvs7sXr16su+1jD++d/H5s2bjfj4eKNcuXJG9+7djbNnz17xdSKSl2akRcTlPv74YzZv3mz32LhxY75e26tXL7v/lF69enWaN2/O6tWrAfjll184fPgw0dHRdmuUy5Qpw3333ceGDRs4c+YMAG3btuWPP/5gz549nDt3joSEBNq3b0/r1q1tM5crVqzAarVy11135fvz3X///Xh7ezNv3jyWLVvGkSNHLtup4+uvv6Zu3bo0bNjQboY+MjIyXx0ZLtS5c2e75/Xr1wewLQnI/Y4uzuWOO+6gVq1arFy5EjBnv8+dO5dnfXPz5s2pXr16vvO52KpVqyhdujTdu3e3O56bT+7752rTpg0BAQEFfr9LuXgZRp06dWjQoIHdsV69epGens7WrVsL/D4NGzakatWqtue1atUCzK4nfn5+eY5fuGzj1KlTjBw5kptuuolSpUpRqlQpypQpw+nTp9m5c2eBc/roo4/o0KEDAwYM4LPPPsPHx6fA1xJxV7rZUERcrlatWgW+2TAoKOiSx3788UcAW5uzKlWq5IkLDg4mJyeH1NRU/Pz8bMs1VqxYQY0aNcjKyqJNmzb8+eeftq4bK1asoEWLFpe9ye1SSpcuzQMPPMAHH3xA9erVadeu3WUL0D///JPffvvtst08HPlP+RUrVrR7brVaAWzLBv7tu8kt5nLjLvddF9Tx48cJCgrKU8xWqlSJUqVK5WlRd6k8L+X6668HYM+ePZeNyT0XEhJid/xKn/FqWuZd3KXE29v7isfPnTtnO9arVy9WrlzJ888/z+23325rm9ihQwe7JSCOWrBgAb6+vgwYMMBlLftErnUqpEXkmnbkyJFLHsstInP/TElJyRN3+PBhPDw8bLOc1apV45ZbbmHFihWEhobSpEkTypcvT9u2bRk4cCAbN25kw4YNjBs3zuE8+/fvz3vvvcdPP/3EvHnzLhsXGBiIr6+v3Zrqi887y4XfzcWdJg4fPmx7r9y4y33XoaGhBX7/jRs3YhiGXSF39OhRzp8/n+ez5rfYa926NaVKlWLx4sU8/vjjl4zJvckwPDzc7vjlPmNuvkUtLS2Nr7/+mjFjxvDss8/ajmdkZPD3339f1bXnzZvH888/T1hYGHFxcXla+YnIv9PSDhG5pn3yySe2G/DA/E/i69evt3WwqFmzJlWrVmX+/Pl2cadPn2bhwoW2Th652rVrx6pVq4iPj7cVWbfccgvXX389L7zwAllZWfm+0fBCzZo1o3///nTt2pWuXbteNi4qKorff/+dihUr0qRJkzyPC4tWq9V6VTOSbdq0AWDu3Ll2xzdv3szOnTttN0Peeeed+Pj45PkHwPr166+qc0Tbtm05depUnk1lPv74Y9v5gggKCqJ///58++23fPrpp3nO7969m1deeYU6derkuSFx+/bttv+akWv+/PmULVuW2267Dcg7s1+YLBYLhmHY3jPXe++9l+8bci+nQoUKrFixglq1atG6dWs2bNhwVdcTcUeakRYRl0tOTub8+fN5jt94441cd911V3zt0aNH6dq1KzExMaSlpTFmzBh8fHwYNWoUAB4eHkyePJnevXsTFRXFY489RkZGBlOmTOHEiRO8/PLLdtdr27Ytb775Jn/99RfTp0+3O/7hhx8SEBCQ79Z3F3v//ff/NSY2NpaFCxdy9913M2TIEOrXr09OTg779+8nLi6OYcOG0bRpUwDq1avHd999x1dffUWVKlUoW7asQxuM1KxZk0cffZQZM2bg4eHBPffcw969e3n++ecJCQlhyJAhAAQEBDB8+HBeeuklBgwYwP3338+BAwcYO3bsVS3teOihh/jvf/9L37592bt3L/Xq1SMhIYGJEyfSoUOHAv2DJde0adP45Zdf6NOnD2vXrqVTp05YrVY2bNjA1KlTKVu2LAsXLszTQzo4OJjOnTszduxYqlSpwty5c4mPj+eVV16x/YPrxhtvxNfXl3nz5lGrVi3KlClDcHAwwcHBBc73csqVK8fdd9/NlClTCAwMJDQ0lDVr1vD+++9Tvnz5q75+2bJlWb58Od26dbN1bmnduvXVJy7iLlx7r6OIuLMrde0AjHfffdcWe7muHXPmzDEGDx5sXHfddYbVajVatmxp/PDDD3nea/HixUbTpk0NHx8fo3Tp0kbbtm2N77//Pk9camqq4eHhYZQuXdrIzMy0HZ83b54BGN26dcvXZ7uwa8eVXKrzxqlTp4znnnvOqFmzpuHt7W34+/sb9erVM4YMGWIcOXLEFpeUlGS0aNHC8PPzs+uecWFXhgtdqqtDdna28corrxi33HKL4eXlZQQGBhp9+vQxDhw4YPfanJwcY9KkSUZISIjh7e1t1K9f3/jqq6+MsLCwAnftMAzDOH78uPH4448bVapUMUqVKmVUr17dGDVqlHHu3Dm7OMB48skn//V9LpSZmWn897//NZo2bWqUKVPGsFqtRs2aNY0RI0YYf/31V5746tWrGx07djS++OILo06dOoa3t7cRGhpqTJs2LU/sJ598Ytx6662Gl5eXARhjxowxDOPyXTs6duyY5xqX+kx79uwxAGPKlCm2YwcPHjTuu+8+IyAgwChbtqzRvn17Izk5+bK/E4507ciVkZFh3HfffYaPj4+xdOnSK75eRP5hMYwL/luniIiImwoNDaVu3bq2jVJERP6N1kiLiIiIiBSACmkRERERkQLQ0g4RERERkQLQjLSIiIiISAGokBYRERERKQAV0iIiIiIiBaANWYpYTk4Ohw8fpmzZsvne7lZEREREio5hGJw8eZLg4GA8PC4/76xCuogdPnyYkJAQV6chIiIiIv/iwIEDVKtW7bLnVUgXsbJlywLmwJQrV87F2ZRcWVlZxMXFERERgZeXl6vTkSKicXc/GnP3ozF3P64Y8/T0dEJCQmx12+WokC5iucs5ypUrp0K6EGVlZeHn50e5cuX0F60b0bi7H425+9GYux9Xjvm/LcPVzYYiIiIiIgWgQlpEREREpABUSIuIiIiIFIAKaRERERGRAlAhLSIiIiJSACqkRUREREQKQIW0iIiIiEgBqJAWERERESkAFdIiIiIiIgWgQlpEREREpABUSIuIiIiIFIAKaRERERGRAlAhLSIiIiJSAKVcnYAUnuxsWLcOUlKgShVo2RI8PV2dlYiIiEjJoEK6hPryS3j6aTh48J9j1arB669Dt26uy0tERESkpNDSjhLoyy+he3f7Ihrg0CHz+JdfuiYvERERkZJEhXQJk51tzkQbRt5zucdiY804ERERESk4FdIlzLp1eWeiL2QYcOCAGSciIiIiBadCuoRJSXFunIiIiIhcmgrpEqZKFefGiYiIiMilqZAuYVq2NLtzWCyXj6lQwYwTERERkYJTIV3CeHqaLe7g8sX033/D9OlFlpKIiIhIiaRCugTq1g2++AKqVrU/HhICUVHmz8OHm4+cnKLPT0RERKQkUCFdQnXrBnv3wurVMH+++eeePbBkCUyebMa8+ir07QtZWS5NVUREROSapJ0NSzBPT2jVKu/xZ56BypWhf3+YOxeOHTNnsMuUKfIURURERK5ZmpF2Uw89ZM5O+/nBt99CmzZmQS0iIiIi+aNC2o116AArV5pdPDZvhhYtzOUgIiIiIvLvVEi7uTvvhO+/h+uvh19/hebN4aefXJ2ViIiISPGnQlq49VZYvx7q1jV3PGzZEtascXVWIiIiIsWbCmkBzFZ569aZRXR6OkRGwpdfujorERERkeJLhbTYlC9v3njYpQtkZED37vDWW67OSkRERKR4UiEtdnx9zVZ4jz4KhgEDB8KYMebPIiIiIvIPFdKSh6cnzJplFtAA48fD44/D+fOuzUtERESkOFEhLZdkscDYsebSDosF3nkH7r8fzp51dWYiIiIixYMKabmixx+Hzz8Hb29YvNi8CfHECVdnJSIiIuJ6KqTlX913H8TFQbly/3T2OHTI1VmJiIiIuJYKacmXsDCziK5SBZKTzY1bdu1ydVYiIiIirqNCWvKtfn1z45ZbboH9++Guu2DjRldnJSIiIuIaKqTFIaGhkJAAd9wBx49DmzawbJmrsxIREREpeiqkxWHXXQcrV5o3Hp45A507w0cfuTorERERkaKlQloKpEwZ+Oor6NMHsrOhXz+YPFkbt4iIiIj7UCEtBeblZc5EDx9uPh85EoYOhZwc1+YlIiIiUhRUSMtV8fCAKVNg6lTz+fTp5ix1ZqZL0xIREREpdCqkxSmGDYO5c6FUKfjkE4iKgpMnXZ2ViIiISOFRIS1O07s3fP01lC4N8fHQujUcPerqrEREREQKhwppcarISFi9GgIDYcsWaNEC/vjD1VmJiIiIOJ8KaXG622+H7783e07/9pu5C+K2ba7OSkRERMS5VEhLobjlFnMXxPr14c8/zS3GV61ydVYiIiIizqNCWgpNlSqwdq1ZRJ88CffcA5995uqsRERERJxDhbQUKn9/WL4c7rvPbIn34IMwc6arsxIRERG5eiqkpdD5+MCnn8LAgebOh089Bc89p10QRURE5NqmQlqKhKenORM9frz5fMIEiImB8+ddm5eIiIhIQamQliJjscDzz8M775g7Ir7/vrnk48wZV2cmIiIi4jgV0lLkYmJg4UJzyceSJRAeDn//7eqsRERERBzj0kJ67dq1dOrUieDgYCwWC4sXL7Y7f+rUKQYNGkS1atXw9fWlVq1avPXWW3YxGRkZPPXUUwQGBlK6dGk6d+7MwYMH7WJSU1OJjo7G398ff39/oqOjOXHihF3M/v376dSpE6VLlyYwMJDBgweTmZlpF/Pzzz8TFhaGr68vVatWZfz48Rha6FsgXbpAXByUL2+2yWvZEg4ccHVWIiIiIvnn0kL69OnTNGjQgJmXaeMwZMgQli9fzty5c9m5cydDhgzhqaee4n//+58tJjY2lkWLFrFgwQISEhI4deoUUVFRZGdn22J69epFUlISy5cvZ/ny5SQlJREdHW07n52dTceOHTl9+jQJCQksWLCAhQsXMmzYMFtMeno64eHhBAcHs3nzZmbMmMHUqVOZNm1aIXwz7qFlS1i3DoKDYccOc+OWHTtcnZWIiIhIPhnFBGAsWrTI7lidOnWM8ePH2x277bbbjOeee84wDMM4ceKE4eXlZSxYsMB2/tChQ4aHh4exfPlywzAMY8eOHQZgbNiwwRaTmJhoAMauXbsMwzCMZcuWGR4eHsahQ4dsMZ988olhtVqNtLQ0wzAM48033zT8/f2Nc+fO2WImTZpkBAcHGzk5Ofn+nGlpaQZgu64Yxt69hnHrrYYBhhEQYBjff3/118zMzDQWL15sZGZmXv3F5JqhcXc/GnP3ozF3P64Y8/zWa6VcWsX/i7vuuoslS5bQv39/goOD+e6779i9ezevv/46AFu2bCErK4uIiAjba4KDg6lbty7r168nMjKSxMRE/P39adq0qS3mzjvvxN/fn/Xr11OzZk0SExOpW7cuwcHBtpjIyEgyMjLYsmULrVu3JjExkbCwMKxWq13MqFGj2Lt3LzVq1LjkZ8jIyCAjI8P2PD09HYCsrCyysrKc80Vd44KDYfVq6NLFk40bPWjXzmDevGyiogq+bCb3u9V37F407u5HY+5+NObuxxVjnt/3KtaF9BtvvEFMTAzVqlWjVKlSeHh48N5773HXXXcBcOTIEby9vQkICLB7XeXKlTly5IgtplKlSnmuXalSJbuYypUr250PCAjA29vbLiY0NDTP++Seu1whPWnSJMaNG5fneFxcHH5+fv/2FbiVIUM8mTq1CT/8EET37h48+eSPtG27/6quGR8f76Ts5FqicXc/GnP3ozF3P0U55mfy2VKs2BfSGzZsYMmSJVSvXp21a9cycOBAqlSpQrt27S77OsMwsFgstucX/uzMGOP/bzS81GtzjRo1iqFDh9qep6enExISQkREBOXKlbvs69xVp07w+OM5zJnjwYwZjahUqT4jRuRwha/4krKysoiPjyc8PBwvL6/CSVaKHY27+9GYux+NuftxxZjnriD4N8W2kD579iyjR49m0aJFdOzYEYD69euTlJTE1KlTadeuHUFBQWRmZpKammo3K3306FGaN28OQFBQEH/++Wee6x87dsw2oxwUFMTGjRvtzqemppKVlWUXkzs7feH7AHlmsy9ktVrtloPk8vLy0l8Al+DlBR99BFWrwssvw/PPe3LsmCevvWb2nnb8evqe3ZHG3f1ozN2Pxtz9FOWY5/d9im0f6dw1xB4XVU+enp7k5OQA0LhxY7y8vOym+lNSUkhOTrYV0s2aNSMtLY1NmzbZYjZu3EhaWppdTHJyMikpKbaYuLg4rFYrjRs3tsWsXbvWriVeXFwcwcHBeZZ8yNWxWGDSJHjtNfP5G29Az55wwVJzEREREZdzaSF96tQpkpKSSEpKAmDPnj0kJSWxf/9+ypUrR1hYGM888wzfffcde/bsYfbs2Xz88cd07doVAH9/fx555BGGDRvGypUr2bZtG3369KFevXq2pR+1atWiffv2xMTEsGHDBjZs2EBMTAxRUVHUrFkTgIiICGrXrk10dDTbtm1j5cqVDB8+nJiYGNvyi169emG1WunXrx/JycksWrSIiRMnMnTo0Csu7ZCCi42F+fPNWerPPoMOHSCf/6VFREREpPAVQQeRy1q9erUB5Hn07dvXMAzDSElJMfr162cEBwcbPj4+Rs2aNY1XX33Vrt3c2bNnjUGDBhkVKlQwfH19jaioKGP//v1273P8+HGjd+/eRtmyZY2yZcsavXv3NlJTU+1i9u3bZ3Ts2NHw9fU1KlSoYAwaNMiu1Z1hGMZPP/1ktGzZ0rBarUZQUJAxduxYh1rfGYba3xVEXJxhlCljtsdr1MgwUlL+/TVqj+SeNO7uR2PufjTm7kft7y6jVatWV9wZMCgoiA8//PCK1/Dx8WHGjBnMmDHjsjEVKlRg7ty5V7zO9ddfz9dff33FmHr16rF27dorxojzhYfDd9+ZM9LbtkGLFvDtt3DTTa7OTERERNxZsV0jLXKhxo3h++/hhhvgjz/MYnrrVldnJSIiIu5MhbRcM266ySymGzaEo0chLAxWrHB1ViIiIuKuVEjLNSUoCNasgTZt4NQpc7nHggWuzkpERETckQppueaUKwfLlkGPHpCVZbbGe+MNV2clIiIi7kaFtFyTrFb45BMYNMh8/vTTMGoUGAZkZ8OaNRbWrq3KmjUWsrNdm6uIiIiUTCqk5Zrl4WHORE+YYD5/+WVo2xaqV4fw8FJMm9aE8PBShIbCl1+6NFUREREpgVRIyzXNYoHRo+H9983CevVqOHTIPubQIejeXcW0iIiIOJcKaSkR+vaFgIBLn8ttVR4bi5Z5iIiIiNOokJYSYd06OH788ucNAw4cMONEREREnEGFtJQIKSnOjRMRERH5NyqkpUSoUsW5cSIiIiL/RoW0lAgtW0K1aubNh5disUBIiBknIiIi4gwqpKVE8PSE1183f75UMW0YMH26GSciIiLiDCqkpcTo1g2++AKqVs17rlw5s8e0iIiIiLOokJYSpVs32LsX4uPPM3ToDyxbdp5bboH0dBg/3tXZiYiISEmiQlpKHE9PCAszuPvuQ7RrZ9iWfLzxBuza5drcREREpORQIS0lXvv20KkTnD9vbsqSu0GLiIiIyNVQIS1uYdo08PaGb7+Fr792dTYiIiJSEqiQFrdw000wdKj585AhkJHh2nxERETk2qdCWtzG6NHmhiy//w6vvebqbERERORap0Ja3EbZsjB5svnzSy/B4cOuzUdERESubSqkxa307g3NmsHp0zBypKuzERERkWuZCmlxKxaL2QbPYoG5c2H9eldnJCIiItcqFdLidpo0gf79zZ8HD4acHNfmIyIiItcmFdLiliZONLcN37IFPvzQ1dmIiIjItUiFtLilSpVg7Fjz51Gj4MQJV2YjIiIi1yIV0uK2Bg2CW2+FY8dg/HhXZyMiIiLXGhXS4ra8vOD1182fZ8yAnTtdm4+IiIhcW1RIi1uLiIDOneH8eYiNBcNwdUYiIiJyrVAhLW5v2jTw9oa4OFiyxNXZiIiIyLVChbS4vRtvhGHDzJ+HDoVz51ybj4iIiFwbVEiLAKNHQ3Aw/PGHOUMtIiIi8m9KORKclpbGokWLWLduHXv37uXMmTNcd911NGrUiMjISJo3b15YeYoUqjJlYPJk6NPH7DHdty9UrerqrERERKQ4y9eMdEpKCjExMVSpUoXx48dz+vRpGjZsSNu2balWrRqrV68mPDyc2rVr8+mnnxZ2ziKFolcvaN4cTp+GkSNdnY2IiIgUd/makW7QoAEPPfQQmzZtom7dupeMOXv2LIsXL2batGkcOHCA4cOHOzVRkcJmsZht8Jo0gXnz4IknoEULV2clIiIixVW+Cunt27dz3XXXXTHG19eXnj170rNnT44dO+aU5ESK2m23wYAB8O67MHgwbNoEnp6uzkpERESKo3wt7fi3Ivpq40WKkwkTwN8ftm6FDz5wdTYiIiJSXDncteOjjz5i6dKltucjRoygfPnyNG/enH379jk1ORFXuO46GDfO/Hn0aDhxwqXpiIiISDHlcCE9ceJEfH19AUhMTGTmzJlMnjyZwMBAhgwZ4vQERVxh4ECoXRv++gvGjnV1NiIiIlIcOVxIHzhwgJtuugmAxYsX0717dx599FEmTZrEunXrnJ6giCt4ecH06ebPM2fCjh0uTUdERESKIYcL6TJlynD8+HEA4uLiaNeuHQA+Pj6cPXvWudmJuFB4OHTpAtnZ8PTTYBiuzkhERESKE4cL6fDwcAYMGMCAAQPYvXs3HTt2BMzOHqGhoc7OT8SlXn0VrFZYsQL+9z9XZyMiIiLFicOF9H//+1+aNWvGsWPHWLhwIRUrVgRgy5Yt9OzZ0+kJirjSDTdAbkv0oUPh3DnX5iMiIiLFh0NbhAOUL1+emTNn5jk+LrfNgUgJM2oUzJ4Ne/aYM9T/+Y+rMxIREZHiwOEZaYB169bRp08fmjdvzqFDhwCYM2cOCQkJTk1OpDgoXRqmTDF/njgRDh50bT4iIiJSPDhcSC9cuJDIyEh8fX3ZunUrGRkZAJw8eZKJEyc6PUGR4uDBB+Guu+DMGRgxwtXZiIiISHHgcCH90ksvMWvWLN599128vLxsx5s3b87WrVudmpxIcWGxwBtvmH9+8gmo06OIiIg4XEj/8ssv3H333XmOlytXjhPaAk5KsEaNICbG/HnwYLMtnoiIiLgvhwvpKlWq8Ntvv+U5npCQwA033OCUpESKq5degvLlISkJ3n/f1dmIiIiIKzlcSD/22GM8/fTTbNy4EYvFwuHDh5k3bx7Dhw9n4MCBhZGjSLFx3XWQ26Bm9GhITXVtPiIiIuI6Dre/GzFiBGlpabRu3Zpz585x9913Y7VaGT58OIMGDSqMHEWKlSeegLffNrcNHzsWXn/d1RmJiIiIKzg0I52dnc2aNWsYNmwYf/31F5s2bWLDhg0cO3aMF198sbByFClWvLz+KZ7/+19ITnZtPiIiIuIaDhXSnp6eREZGkpaWhp+fH02aNOGOO+6gTJkyhZWfSLHUrh107WrecBgbC4bh6oxERESkqDm8RrpevXr88ccfhZGLyDXl1VfBaoWVK2HRIldnIyIiIkXN4UJ6woQJDB8+nK+//pqUlBTS09PtHo5Yu3YtnTp1Ijg4GIvFwuLFi/PE7Ny5k86dO+Pv70/ZsmW588472b9/v+18RkYGTz31FIGBgZQuXZrOnTtz8KKt51JTU4mOjsbf3x9/f3+io6PztOrbv38/nTp1onTp0gQGBjJ48GAyMzPtYn7++WfCwsLw9fWlatWqjB8/HkNTkW6rRg145hnz52HD4OxZ1+YjIiIiRcvhQrp9+/b8+OOPdO7cmWrVqhEQEEBAQADly5cnICDAoWudPn2aBg0aMHPmzEue//3337nrrru49dZb+e677/jxxx95/vnn8fHxscXExsayaNEiFixYQEJCAqdOnSIqKorsC5r89urVi6SkJJYvX87y5ctJSkoiOjradj47O5uOHTty+vRpEhISWLBgAQsXLmTYsGG2mPT0dMLDwwkODmbz5s3MmDGDqVOnMm3aNIc+s5Qszz4L1arB3r0wdaqrsxEREZGi5HDXjtWrVzvtze+55x7uueeey57/z3/+Q4cOHZg8ebLt2IW9qtPS0nj//feZM2cO7dq1A2Du3LmEhISwYsUKIiMj2blzJ8uXL2fDhg00bdoUgHfffZdmzZrxyy+/ULNmTeLi4tixYwcHDhwgODgYgFdffZV+/foxYcIEypUrx7x58zh37hyzZ8/GarVSt25ddu/ezbRp0xg6dCgWi8Vp34tcO0qXhilToGdPmDQJ+vWDkBBXZyUiIiJFweFCOiwsrDDyyCMnJ4elS5cyYsQIIiMj2bZtGzVq1GDUqFF06dIFgC1btpCVlUVERITtdcHBwdStW5f169cTGRlJYmIi/v7+tiIa4M4778Tf35/169dTs2ZNEhMTqVu3rq2IBoiMjCQjI4MtW7bQunVrEhMTCQsLw2q12sWMGjWKvXv3UqNGjUt+joyMDDIyMmzPc5e/ZGVlkZWV5ZTvSvLK/W6L4jvu1g3uusuThAQPhg3LYd48bXnoKkU57lI8aMzdj8bc/bhizPP7Xg4X0rnOnDnD/v3786wjrl+/fkEvaefo0aOcOnWKl19+mZdeeolXXnmF5cuX061bN1avXk1YWBhHjhzB29s7z5KSypUrc+TIEQCOHDlCpUqV8ly/UqVKdjGVK1e2Ox8QEIC3t7ddTGhoaJ73yT13uUJ60qRJjMvdweMCcXFx+Pn55eObkKsRHx9fJO/TrVs51q9vxeefe9Cw4Xrq1DleJO8rl1ZU4y7Fh8bc/WjM3U9RjvmZM2fyFedwIX3s2DEefvhhvvnmm0uev3Bt8tXIyckB4N5772XIkCEANGzYkPXr1zNr1qwrzowbhmG31OJSyy6cEZN7o+GVlnWMGjWKoUOH2p6np6cTEhJCREQE5cqVu+zr5OpkZWURHx9PeHg4Xl5eRfKeu3bl8M47nnz6aQs2bjyPp2eRvK1cwBXjLq6lMXc/GnP344oxz28DDYcL6djYWFJTU9mwYQOtW7dm0aJF/Pnnn7z00ku8+uqrDid6OYGBgZQqVYratWvbHa9VqxYJCQkABAUFkZmZSWpqqt2s9NGjR2nevLkt5s8//8xz/WPHjtlmlIOCgti4caPd+dTUVLKysuxicmenL3wfIM9s9oWsVqvdcpBcXl5e+gugCBTl9zxhAnz2Gfz0k4XZs714/PEieVu5BP1+uR+NufvRmLufohzz/L6Pw107Vq1axWuvvcbtt9+Oh4cH1atXp0+fPkyePJlJkyY5nOjleHt7c/vtt/PLL7/YHd+9ezfVq1cHoHHjxnh5edlN9aekpJCcnGwrpJs1a0ZaWhqbNm2yxWzcuJG0tDS7mOTkZFJSUmwxcXFxWK1WGjdubItZu3at3VKWuLg4goOD8yz5EPcUGAjjx5s/P/cc/P23a/MRERGRwuVwIX369GnbmuMKFSpw7NgxwNyoZevWrQ5d69SpUyQlJZGUlATAnj17SEpKsvWJfuaZZ/j000959913+e2335g5cyZfffUVAwcOBMDf359HHnmEYcOGsXLlSrZt20afPn2oV6+erYtHrVq1aN++PTExMWzYsIENGzYQExNDVFQUNWvWBCAiIoLatWsTHR3Ntm3bWLlyJcOHDycmJsa2/KJXr15YrVb69etHcnIyixYtYuLEierYIXaeeALq1IHjx2HMGFdnIyIiIoXJ4UK6Zs2atlnihg0b8vbbb3Po0CFmzZpFlSpVHLrWDz/8QKNGjWjUqBEAQ4cOpVGjRrzwwgsAdO3alVmzZjF58mTq1avHe++9x8KFC7nrrrts13jttdfo0qULPXr0oEWLFvj5+fHVV1/hecEC1Xnz5lGvXj0iIiKIiIigfv36zJkzx3be09OTpUuX4uPjQ4sWLejRowddunRh6gWNgf39/YmPj+fgwYM0adKEgQMHMnToULv1zyKlSsEbb5g/v/UW/Pyza/MRERGRwlOgNdK5SyDGjBlDZGQk8+bNw9vbm9mzZzt0rVatWv3rzoD9+/enf//+lz3v4+PDjBkzmDFjxmVjKlSowNy5c6/4Ptdffz1ff/31FWPq1avH2rVrrxgj0qYN3HcfLFwITz9tbiGu/2ghIiJS8jhcSPfu3dv2c6NGjdi7dy+7du3i+uuvJzAw0KnJiVyrpk6FpUth9Wr48kuzsBYREZGSxeGlHRfz8/PjtttuUxEtcoHQUBgxwvx52DA4e9al6YiIiEghcHhG+krLLAA++OCDAicjUpKMHAkffgj79pnbiP//0n8REREpIRyekU5NTbV7HD16lFWrVvHll19y4sSJQkhR5Nrk52cu8QB4+WX4/2Y0IiIiUkI4PCO9aNGiPMdycnIYOHAgN9xwg1OSEikp7r8f3nwT1qyBZ56BTz91dUYiIiLiLFe9RhrAw8ODIUOG8NprrznjciIlhsUCr78OHh7mrofffefqjERERMRZnFJIA/z++++cP3/eWZcTKTEaNIDHHjN/fvpp0K+JiIhIyeDw0o6LNyAxDIOUlBSWLl1K3759nZaYSEny4ouwYAH89BO88w78/+acIiIicg1zuJDetm2b3XMPDw+uu+46Xn311X/t6CHiripWNIvpQYPg+efhgQfMYyIiInLtcriQXr16dWHkIVLiPfYYvP22uW34Cy/Af//r6oxERETkajhtjbSIXFmpUuaNhwCzZpnLPEREROTa5fCMdKNGjbBYLPmK3bp1q8MJiZRkrVtD9+7wxRfmjYerVpmdPUREROTa4/CMdPv27fn999+xWq20atWKVq1a4ePjw++//05ERAT33nuv7SEieU2dCj4+Ziu8L75wdTYiIiJSUA7PSB87dozBgwfz4osv2h0fM2YMBw4c0BbhIv+ienVz+/Bx42D4cOjY0dwFUURERK4tDs9If/755zz00EN5jvfp04eFCxc6JSmRkm7ECLj+enPb8MmTXZ2NiIiIFITDhbSvry8JCQl5jickJODj4+OUpERKOj8/c4kHwCuvwL59rs1HREREHOfw0o7Y2FieeOIJtmzZwp133gnAhg0b+OCDD3jhhRecnqBISdW9O7RqZa6VHj4cPv/c1RmJiIiIIxwupJ999lluuOEGXn/9debPnw9ArVq1mD17Nj169HB6giIllcVitsNr1Mi86XD1arOrh4iIiFwbHC6kAXr06KGiWcQJ6teHxx+HN9+EwYNh2zaz37SIiIgUfw6vkT5w4AAHDx60Pd+0aROxsbG88847Tk1MxF2MHw8VKkBysrnzoYiIiFwbHC6ke/XqZdsm/MiRI7Rr145NmzYxevRoxo8f7/QERUq6ihUht5vk88/D8eOuzUdERETyx+FCOjk5mTvuuAOAzz77jHr16rF+/Xrmz5/P7NmznZ2fiFt49FFzmUdqqllMi4iISPHncCGdlZWF1WoFYMWKFXTu3BmAW2+9lZSUFOdmJ+ImSpWCN94wf377bfjxR9fmIyIiIv/O4UK6Tp06zJo1i3Xr1hEfH0/79u0BOHz4MBUrVnR6giLuIiwMevSAnBzzxkPDcHVGIiIiciUOF9KvvPIKb7/9Nq1ataJnz540aNAAgCVLltiWfIhIwUyZAr6+sHat+kqLiIgUdw432mrVqhV//fUX6enpBAQE2I4/+uij+Pn5OTU5EXdz/fXw7LMwZoy5SUtUlLkLooiIiBQ/Ds9IA3h6etoV0QChoaFUqlTJKUmJuLNnnoHq1eHAAXP7cBERESmeClRIi0jh8fWFV181f548GfbudWk6IiIichkqpEWKoW7dzO3Cz50zl3iIiIhI8aNCWqQYsljg9dfBwwMWLoRVq1ydkYiIiFwsX4V0hQoV+OuvvwDo378/J0+eLNSkRATq1YOBA82fBw+G8+ddm4+IiIjYy1chnZmZSXp6OgAfffQR586dK9SkRMQ0bpy5hfj27fDWW67ORkRERC6Ur/Z3zZo1o0uXLjRu3BjDMBg8eDC+vr6XjP3ggw+cmqCIO6tQAV56CZ54Al54AXr2hMBAV2clIiIikM8Z6blz59KhQwdOnTqFxWIhLS2N1NTUSz5ExLliYqBBAzhxAp57ztXZiIiISK58zUhXrlyZl19+GYAaNWowZ84cbQcuUkQ8PeGNN8wtxN95Bx57DBo1cnVWIiIi4nDXjj179qiIFilid98NDzwAhmHeeGgYrs5IRERECtT+bs2aNXTq1ImbbrqJm2++mc6dO7Nu3Tpn5yYiF5gyxdysJSEBPv3U1dmIiIiIw4X03LlzadeuHX5+fgwePJhBgwbh6+tL27ZtmT9/fmHkKCJASAiMGmX+PHw4nD7t2nxERETcncOF9IQJE5g8eTKffvopgwcP5umnn+bTTz/l5Zdf5sUXXyyMHEXk/w0fDqGhcOgQ/P9tCyIiIuIiDhfSf/zxB506dcpzvHPnzuzZs8cpSYnIpfn6wquvmj9PmQL6lRMREXEdhwvpkJAQVq5cmef4ypUrCQkJcUpSInJ5XbtCmzaQkQHDhrk6GxEREfeVr/Z3Fxo2bBiDBw8mKSmJ5s2bY7FYSEhIYPbs2bz++uuFkaOIXMBigddfh4YNYdEiWLEC2rVzdVYiIiLux+FC+oknniAoKIhXX32Vzz77DIBatWrx6aefcu+99zo9QRHJq25dGDgQZsyAp5+GpCTw8nJ1ViIiIu7F4UIaoGvXrnTt2tXZuYiIA8aNg/nzYccOeOsts7+0iIiIFJ0C9ZEWEdcLCIAJE8yfX3gBjh1zbT4iIiLuRoW0yDVswABzrXRaGjz3nKuzERERcS8qpEWuYZ6e8MYb5s/vvgtbt7o2HxEREXeiQlrkGteyJfTsCYYBTz0Fq1fDJ5/Ad99BdrarsxMRESm5VEiLlACTJ4O3N6xfb/aY7tULWrc2d0H88ktXZyciIlIyOdy1wzAMvvjiC1avXs3Ro0fJycmxO/+l/l9bpMht2gSZmXmPHzoE3bvDF19At25Fn5eIiEhJ5vCM9NNPP010dDR79uyhTJky+Pv72z1EpGhlZ5u9pC/FMMw/Y2O1zENERMTZHJ6Rnjt3Ll9++SUdOnQojHxExEHr1sHBg5c/bxhw4IAZ16pVkaUlIiJS4jk8I+3v788NN9xQGLmISAGkpDg3TkRERPLH4UJ67NixjBs3jrNnz171m69du5ZOnToRHByMxWJh8eLFl4197LHHsFgsTJ8+3e54RkYGTz31FIGBgZQuXZrOnTtz8KLpudTUVKKjo23LT6Kjozlx4oRdzP79++nUqROlS5cmMDCQwYMHk3nRotOff/6ZsLAwfH19qVq1KuPHj8fI/W/nIi5SpYpz40RERCR/HC6k77//flJTU6lUqRL16tXjtttus3s44vTp0zRo0ICZM2deMW7x4sVs3LiR4ODgPOdiY2NZtGgRCxYsICEhgVOnThEVFUX2BQtCe/XqRVJSEsuXL2f58uUkJSURHR1tO5+dnU3Hjh05ffo0CQkJLFiwgIULFzJs2DBbTHp6OuHh4QQHB7N582ZmzJjB1KlTmTZtmkOfWcTZWraEatXAYrl8TGCgGSciIiLO4/Aa6X79+rFlyxb69OlD5cqVsVzp/73/xT333MM999xzxZhDhw4xaNAgvv32Wzp27Gh3Li0tjffff585c+bQrl07wFzDHRISwooVK4iMjGTnzp0sX76cDRs20LRpUwDeffddmjVrxi+//ELNmjWJi4tjx44dHDhwwFasv/rqq/Tr148JEyZQrlw55s2bx7lz55g9ezZWq5W6deuye/dupk2bxtChQ6/qexC5Gp6e8PrrZncOi+WfGwwvdOIExMXBv/y6iYiIiAMcLqSXLl3Kt99+y1133VUY+djJyckhOjqaZ555hjp16uQ5v2XLFrKysoiIiLAdCw4Opm7duqxfv57IyEgSExPx9/e3FdEAd955J/7+/qxfv56aNWuSmJhI3bp17Wa8IyMjycjIYMuWLbRu3ZrExETCwsKwWq12MaNGjWLv3r3UqFHjkp8hIyODjIwM2/P09HQAsrKyyMrKKviXI1eU+926y3fcqRMsWGBh6FBPDh365x911aoZBAcbbNrkQdeuBl9+mU14eMldjuRu4y4ac3ekMXc/rhjz/L6Xw4V0SEgI5cqVczihgnjllVcoVaoUgwcPvuT5I0eO4O3tTUBAgN3xypUrc+TIEVtMpUqV8ry2UqVKdjGVK1e2Ox8QEIC3t7ddTGhoaJ73yT13uUJ60qRJjBs3Ls/xuLg4/Pz8LvkacZ74+HhXp1BkrFZzu/AdOyqSmupDQMA5atc+jmFYmDLldjZurELXrhb+859NNGhwzNXpFip3Gncxaczdj8bc/RTlmJ85cyZfcQ4X0q+++iojRoxg1qxZeQpLZ9qyZQuvv/46W7dudXjZhGEYdq+51OudEZN7o+GV8hs1ahRDhw61PU9PTyckJISIiIgi+weJO8rKyiI+Pp7w8HC8vLxcnU6R6tQp77H27eGBB3JYutSTl19uxv/+l02rViVvZtqdx91daczdj8bc/bhizHNXEPwbhwvpPn36cObMGW688Ub8/PzyfKC///7b0Ute0rp16zh69CjXX3+97Vh2djbDhg1j+vTp7N27l6CgIDIzM0lNTbWblT569CjNmzcHICgoiD///DPP9Y8dO2abUQ4KCmLjxo1251NTU8nKyrKLyZ2dvvB9gDyz2ReyWq12y0FyeXl56S+AIqDv2eTlBQsXmrsbLltmoUuXUnzzDdx9t6szKxwad/ejMXc/GnP3U5Rjnt/3cbiQvrj9XGGJjo623UCYKzIykujoaB5++GEAGjdujJeXF/Hx8fTo0QOAlJQUkpOTmTx5MgDNmjUjLS2NTZs2cccddwCwceNG0tLSbMV2s2bNmDBhAikpKVT5/x5hcXFxWK1WGjdubIsZPXo0mZmZeHt722KCg4MLdWZexFmsVrOY7tIFvv0WOnSA5cuhCG53EBERKZEcLqT79u3rtDc/deoUv/32m+35nj17SEpKokKFClx//fVUrFjRLt7Ly4ugoCBq1qwJmJvDPPLIIwwbNoyKFStSoUIFhg8fTr169WxFeK1atWjfvj0xMTG8/fbbADz66KNERUXZrhMREUHt2rWJjo5mypQp/P333wwfPpyYmBjb8otevXoxbtw4+vXrx+jRo/n111+ZOHEiL7zwgjp2yDXDxwcWLYLOnWHFCrOLR1wcNGvm6sxERESuPQ4X0vv377/i+QuXYvybH374gdatW9ue564l7tu3L7Nnz87XNV577TVKlSpFjx49OHv2LG3btmX27Nl4enraYubNm8fgwYNt3T06d+5s17va09OTpUuXMnDgQFq0aIGvry+9evVi6tSpthh/f3/i4+N58sknadKkCQEBAQwdOtRu/bPItcDXF/73P3Mt9apVEBkJ8fFwQWMbERERyQeHC+nQ0NArzsBeuBHKv2nVqpVDOwPu3bs3zzEfHx9mzJjBjBkzLvu6ChUqMHfu3Cte+/rrr+frr7++Yky9evVYu3ZtvnIVKc78/GDJEujYEdasMYvpFSugSRNXZyYiInLtcLiQ3rZtm93zrKwstm3bxrRp05gwYYLTEhORwlW6NHz9tbm8IyEBwsNh5UpwcINSERERt+VwId2gQYM8x5o0aUJwcDBTpkyhW7duTklMRApfmTKwbJnZHm/9emjXzlzu0bChqzMTEREp/jycdaFbbrmFzZs3O+tyIlJEypaFb74x10inpprF9E8/uTorERGR4s/hQjo9Pd3ukZaWxq5du3j++ee5+eabCyNHESlk5cqZLfFuvx2OH4e2bSE52dVZiYiIFG8OL+0oX778JXf4CwkJYcGCBU5LTESKlr+/WUy3awdbt5rF9HffQa1ars5MRESkeHK4kF69erXdcw8PD6677jpuuukmSpVy+HIiUowEBJit8Nq2haQkaNPGLKb/v+W6iIiIXMDhyjcsLKww8hCRYqJCBbMVXps25lrp1q3NFnlauSUiImLP4TXSH330EUuXLrU9HzFiBOXLl6d58+bs27fPqcmJiGtUrGgW03XrQkqKWUz//rursxIRESleHC6kJ06ciK+vLwCJiYnMnDmTyZMnExgYyJAhQ5yeoIi4xnXXmX2la9eGQ4fMYvqPP1ydlYiISPHhcCF94MABbrrpJgAWL15M9+7defTRR5k0aRLr1q1zeoIi4jqVKpnFdM2acOCAWUxfYoNRERERt+RwIV2mTBmOHz8OQFxcHO3atQPMrbrPnj3r3OxExOWCgsxNWm6+GfbvN9dO79/v6qxERERcz+FCOjw8nAEDBjBgwAB2795Nx44dAdi+fTuhoaHOzk9EioHgYFi9Gm68EfbsMWemDx50dVYiIiKu5XAh/d///pdmzZpx7NgxFi5cSMWKFQHYsmULPXv2dHqCIlI8VK1qFtM1aphrpVu3hsOHXZ2ViIiI6xRoQ5aZM2fmOT5u3DinJCQixVdIiFlMt2oFv/1mFtPffQdVqrg6MxERkaLn8Iy0iLi36tXNYvr662H3bnPzlj//dHVWIiIiRU+FtIg4LDTULKarVYOdO81i+uhRV2clIiJStFRIi0iB3HCDWUwHB8P27dCuHfz1l6uzEhERKToOFdKGYbBv3z61uRMRAG66ySymg4Lg55/NYvrvv12dlYiISNFwuJC++eabOai+VyLy/265xSymK1eGH380i+nUVFdnJSIiUvgcKqQ9PDy4+eabbRuyiIgA3HqruWnLddfBtm0QEQEnTrg6KxERkcLl8BrpyZMn88wzz5CcnFwY+YjINap2bbOYDgyEH36AyEhIS3N1ViIiIoXH4T7Sffr04cyZMzRo0ABvb298fX3tzv+tBZIibqtuXVixwtxGfNMmuOce+PZbKFvW1ZmJiIg4n8OF9PTp0wshDREpKRo0MIvptm0hMdEsppcvhzJlXJ2ZiIiIczlcSPft27cw8hCREqRRI4iPN4vp77+Hjh1h2TIoXdrVmYmIiDhPgfpI//777zz33HP07NmTo/+/C8Py5cvZvn27U5MTkWtX48ZmMV2uHKxdC506wZkzrs5KRETEeRwupNesWUO9evXYuHEjX375JadOnQLgp59+YsyYMU5PUESuXbff/s8a6dWroXNnUBt6EREpKRwupJ999lleeukl4uPj8fb2th1v3bo1iYmJTk1ORK59d975zxrplSuhSxc4d87VWYmIiFw9hwvpn3/+ma5du+Y5ft1116m/tIhcUvPm/6yRjouDbt0gI8PVWYmIiFwdhwvp8uXLk5KSkuf4tm3bqFq1qlOSEpGSp2VLWLoUfH3hm2/gvvtUTIuIyLXN4UK6V69ejBw5kiNHjmCxWMjJyeH7779n+PDhPPTQQ4WRo4iUEGFh8PXX4ONjFtUPPACZma7OSkREpGAcLqQnTJjA9ddfT9WqVTl16hS1a9fm7rvvpnnz5jz33HOFkaOIlCBt2sCSJWC1wv/+Bw8+CFlZrs5KRETEcQ4X0l5eXsybN49ff/2Vzz77jLlz57Jr1y7mzJmDp6dnYeQoIiVMeLhZRHt7w6JF0KsXnD/v6qxEREQc43AhPX78eM6cOcMNN9xA9+7d6dGjBzfffDNnz55l/PjxhZGjiJRAkZFmEe3lBV98AdHRKqZFROTa4nAhPW7cOFvv6AudOXOGcePGOSUpEXEPHTrAwoVmMb1gAfTtC9nZrs5KREQkfxwupA3DwGKx5Dn+448/UqFCBackJSLuo1Mn+OwzKFUK5s+Hhx9WMS0iIteGUvkNDAgIwGKxYLFYuOWWW+yK6ezsbE6dOsXjjz9eKEmKSMnWpYs5I/3AAzBnjllUv/ceeDj8T30REZGik+9Cevr06RiGQf/+/Rk3bhz+/v62c97e3oSGhtKsWbNCSVJESr777jNnpHv1gg8/BE9PePttFdMiIlJ85buQ7tu3LwA1atSgRYsWlCqV75eKiORLjx7mso4+fcwZaU9PeOstuMRqMhEREZdzeK6nbNmy7Ny50/b8f//7H126dGH06NFkamcFEblKPXvCRx+ZxfPbb8NTT4FhuDorERGRvBwupB977DF2794NwB9//MEDDzyAn58fn3/+OSNGjHB6giLifvr0MZd3WCzw3/9CbKyKaRERKX4cLqR3795Nw4YNAfj8888JCwtj/vz5zJ49m4ULFzo7PxFxU337mss7AN54A4YNUzEtIiLFS4Ha3+Xk5ACwYsUKOnToAEBISAh//fWXc7MTEbfWvz+8847582uvwciRKqZFRKT4cLiQbtKkCS+99BJz5sxhzZo1dOzYEYA9e/ZQuXJlpycoIu4tJsa84RBgyhT4z39UTIuISPHgcCE9ffp0tm7dyqBBg/jPf/7DTTfdBMAXX3xB8+bNnZ6giMjjj8OMGebPkybBmDGuzUdERAQcaH+Xq379+vz88895jk+ZMgVPT0+nJCUicrFBg8zWeLGx8OKL5qYtL7zg6qxERMSdOa0ZtI+Pj7MuJSJySU8/bRbTw4aZs9KenuZSDxEREVdwuJD28PCw2x78YtnZ2VeVkIjIlQwdCufPmzcePvecWUw/+6xZYK9ZY2Ht2qqULm2hdWvznIiISGFxuJBetGiR3fOsrCy2bdvGRx99xLhx45yWmIjI5YwYYRbT//kPjBoFO3fCqlVw8GApoAnTpkG1avD669Ctm6uzFRGRksrhQvree+/Nc6x79+7UqVOHTz/9lEceecQpiYmIXMno0eYs9AsvwMcf5z1/6BB07w5ffKFiWkRECofDXTsup2nTpqxYscJZlxMR+VejR0O5cpc+l9siLzbWLLhFRESczSmF9NmzZ5kxYwbVqlVzxuVERPJl3TpIT7/8ecOAAwfMOBEREWdzeGlHQECA3c2GhmFw8uRJ/Pz8mDt3rlOTExG5kpQU58aJiIg4wuFC+rXXXrMrpD08PLjuuuto2rQpAQEBTk1ORORKqlRxbpyIiIgjHC6k+/XrVwhpiIg4rmVLszvHoUOX3za8TBm4446izUtERNxDvtZI//TTT/l+OGLt2rV06tSJ4OBgLBYLixcvtp3Lyspi5MiR1KtXj9KlSxMcHMxDDz3E4cOH7a6RkZHBU089RWBgIKVLl6Zz584cPHjQLiY1NZXo6Gj8/f3x9/cnOjqaEydO2MXs37+fTp06Ubp0aQIDAxk8eDCZmZl2MT///DNhYWH4+vpStWpVxo8fj3G5//cWkULn6Wm2uAO4XHv7U6egRQvYtavo8hIREfeQrxnphg0bYrFY/rVotFgsDm3Icvr0aRo0aMDDDz/MfffdZ3fuzJkzbN26leeff54GDRqQmppKbGwsnTt35ocffrDFxcbG8tVXX7FgwQIqVqzIsGHDiIqKYsuWLbYty3v16sXBgwdZvnw5AI8++ijR0dF89dVXgLmJTMeOHbnuuutISEjg+PHj9O3bF8MwmDFjBgDp6emEh4fTunVrNm/ezO7du+nXrx+lS5dm2LBh+f7MIuJc3bqZLe6efhou/Dd0SAhER8M770BSEjRuDDNmwMMPX77oFhERcYiRD3v37s33o6AAY9GiRVeM2bRpkwEY+/btMwzDME6cOGF4eXkZCxYssMUcOnTI8PDwMJYvX24YhmHs2LHDAIwNGzbYYhITEw3A2LVrl2EYhrFs2TLDw8PDOHTokC3mk08+MaxWq5GWlmYYhmG8+eabhr+/v3Hu3DlbzKRJk4zg4GAjJycn358zLS3NAGzXlcKRmZlpLF682MjMzHR1KlJEzp83jPj4LGPo0M1GfHyWcf68efzwYcNo29YwzMUfhvHAA4Zx4oRrcxXn0e+6+9GYux9XjHl+67V8zUhXr1698Cp5B6SlpWGxWChfvjwAW7ZsISsri4iICFtMcHAwdevWZf369URGRpKYmIi/vz9Nmza1xdx55534+/uzfv16atasSWJiInXr1iU4ONgWExkZSUZGBlu2bKF169YkJiYSFhaG1Wq1ixk1ahR79+6lRo0al8w5IyODjIwM2/P0/+/VlZWVRVZWllO+F8kr97vVd+xemjfP4vTpQzRvXpucHIOcHAgMhKVLYepUD8aM8eDTTy1s3GgwZ042TZtqada1Tr/r7kdj7n5cMeb5fS+HbzacNGkSlStXpn///nbHP/jgA44dO8bIkSMdvWS+nDt3jmeffZZevXpR7v93YDhy5Aje3t55uoVUrlyZI0eO2GIqVaqU53qVKlWyi6lcubLd+YCAALy9ve1iQkND87xP7rnLFdKTJk265NbpcXFx+Pn5/dvHlqsUHx/v6hTEBS417nXrwsSJAUyb1pi9e0sTFuZBr1676Nr1V/5/FZhcw/S77n405u6nKMf8zJkz+YpzuJB+++23mT9/fp7jderU4cEHHyyUQjorK4sHH3yQnJwc3nzzzX+NNwzDrkWf5RILIp0RY/z/mvFLvTbXqFGjGDp0qO15eno6ISEhRERE2P5BIM6XlZVFfHw84eHheHl5uTodKSL/Nu4dOkD//vDkkzl89pkHc+fW5tChW/nww2wu+A9Scg3R77r70Zi7H1eMefqVdvu6gMOF9JEjR6hyiaas1113HSmFsOtBVlYWPXr0YM+ePaxatcqu+AwKCiIzM5PU1FS7WemjR4/SvHlzW8yff/6Z57rHjh2zzSgHBQWxceNGu/OpqalkZWXZxeTOTl/4PkCe2ewLWa1Wu+Uguby8vPQXQBHQ9+yerjTugYGwYAHccw88+SSsXu1B48YezJ4NUVFFm6c4j37X3Y/G3P0U5Zjn930c3iI8JCSE77//Ps/x77//3m6NsTPkFtG//vorK1asoGLFinbnGzdujJeXl91Uf0pKCsnJybZCulmzZqSlpbFp0yZbzMaNG0lLS7OLSU5OtvuHQFxcHFarlcaNG9ti1q5da9cSLy4ujuDg4DxLPkSkeLNYoF8/2LoVGjaE48ehUyez88cFtzSIiIhckcOF9IABA4iNjeXDDz9k37597Nu3jw8++IAhQ4YQExPj0LVOnTpFUlISSUlJAOzZs4ekpCT279/P+fPn6d69Oz/88APz5s0jOzubI0eOcOTIEVsx6+/vzyOPPMKwYcNYuXIl27Zto0+fPtSrV4927doBUKtWLdq3b09MTAwbNmxgw4YNxMTEEBUVRc2aNQGIiIigdu3aREdHs23bNlauXMnw4cOJiYmxzYD36tULq9VKv379SE5OZtGiRUycOJGhQ4decWmHiBRfNWvChg0QG2s+f+MNuPNO9ZwWEZF8crQdSE5OjjFixAjDx8fH8PDwMDw8PAw/Pz9j3LhxDrcWWb16tQHkefTt29fYs2fPJc8BxurVq23XOHv2rDFo0CCjQoUKhq+vrxEVFWXs37/f7n2OHz9u9O7d2yhbtqxRtmxZo3fv3kZqaqpdzL59+4yOHTsavr6+RoUKFYxBgwbZtbozDMP46aefjJYtWxpWq9UICgoyxo4d61DrO8NQ+7uiovZI7ulqxv3rrw0jMNBskefnZxjvv28YDv56iwvod939aMzdT3Fuf2cxjIJtzXfq1Cl27tyJr68vN9988yXXAUte6enp+Pv7k5aWppsNC1FWVhbLli2jQ4cOWkPnRq523FNSzE1cVq40nz/wALz9Nvj7OzlRcRr9rrsfjbn7ccWY57dec3hpR64yZcrw22+/UaNGDRXRIlIiVKkCcXEwaZK5/finn5prqDdscHVmIiJSHBW4kAZ47LHHLtkRQ0TkWuXhAc8+CwkJUKMG7N0Ld91lFtfZ2a7OTkREipOrKqQLuCpERKTYu/NO2LYNHnzQLKBHj4aICDh82NWZiYhIcXFVhbSISEnm7w/z58OHH4KfH6xaBfXrw9dfuzozEREpDq6qkP7mm2+c3jtaRKQ4Uc9pERG5nKsqpO+66y58fHyclYuISLGlntMiInIxhwvpP//8k+joaIKDgylVqhSenp52DxGRkspqhddeM5d2BAZCUhI0bgwffAC6ZURExP2UcvQF/fr1Y//+/Tz//PNUqVJFu/qJiNvp2BF++umfntOPPGK2zVPPaRER9+JwIZ2QkMC6deto2LBhIaQjInJtyO05PXkyPPec2XN640b45BNzyYeIiJR8Di/tCAkJUds7ERHUc1pExN05XEhPnz6dZ599lr179xZCOiIi1x71nBYRcU8OF9IPPPAA3333HTfeeCNly5alQoUKdg8REXekntMiIu7H4TXS06dPL4Q0RESufbk9p5s1M2enk5LMntODB5trqa1WV2coIiLO5HAh3bdv38LIQ0SkxMjtOf3sszB9utlzeu1a80bEW291dXYiIuIs+VrakZ6ebvfzlR4iIqKe0yIi7iBfhXRAQABHjx4FoHz58gQEBOR55B4XEZF/5PacbtsWzpwxe0737Alpaa7OTERErla+lnasWrXKdiPh6tWrCzUhEZGSRj2nRURKpnwV0mFhYZf8WURE8ie353SrVtCrF+zZY/acfvFFGDECPD1dnaGIiDjK4ZsNAU6cOMGmTZs4evQoOTk5duceeughpyQmIlIS5facfvxxWLDA7Dm9YgXMmQPBwa7OTkREHOFwIf3VV1/Ru3dvTp8+TdmyZbFYLLZzFotFhbSIyL/I7TkdGQlPPvlPz+nZsyEqytXZiYhIfjm8IcuwYcPo378/J0+e5MSJE6Smptoef//9d2HkKCJS4uT2nN66FRo2hOPHzZ7TTz8NGRmuzk5ERPLD4UL60KFDDB48GD8/v8LIR0TEreT2nH76afP5G2+Yyz927XJtXiIi8u8cLqQjIyP54YcfCiMXERG3ZLWaG7d89ZV6TouIXEvytUZ6yZIltp87duzIM888w44dO6hXrx5eXl52sZ07d3ZuhiIibiIqCn78EaKjzXXTjzxits17+21zXbWIiBQv+Sqku3TpkufY+PHj8xyzWCxkZ2dfdVIiIu4qONgsnqdMUc9pEZHiLl9LO3JycvL1UBEtInL1PD3NntMJCRAaCnv3mj2nJ00C/TUrIlJ8OLxG+uOPPybjEreUZ2Zm8vHHHzslKRERMWegk5LggQfMAnr0aIiIgMOHXZ2ZiIhAAQrphx9+mLS0tDzHT548ycMPP+yUpERExOTvby7r+OAD8PP7p+f011+b57Oz4bvvzJjvvtOMtYhIUXK4kDYMw24TllwHDx7EX3fDiIg4ncUCDz+ct+d0x45QvTq0bm1uO966tbkU5MsvXZ2xiIh7yPfOho0aNcJisWCxWGjbti2lSv3z0uzsbPbs2UP79u0LJUkREfmn5/TIkfD667BsWd6YQ4ege3f44gvo1q3ocxQRcSf5LqRzO3ckJSURGRlJmTJlbOe8vb0JDQ3lvvvuc3qCIiLyD6sVXn0V5s41Z6YvZhjmDHZsLNx7r3njooiIFI58F9JjxowBIDQ0lAceeAAfH59CS0pERC5v3bpLF9G5DAMOHDDjWrUqsrRERNxOvgvpXH379i2MPEREJJ9SUvIXp+4eIiKFK1+FdIUKFdi9ezeBgYEEBARc8mbDXH///bfTkhMRkbyqVMlf3KRJ5gYvmpUWESkc+SqkX3vtNcqWLWv7+UqFtIiIFK6WLaFaNfPGQsO4fFxystnJo00bePFFaN686HIUEXEH+Sqk+/bty5kzZwDo169fYeYjIiL/wtPT7NrRvbt5Y+GFxXTuPMfbb5ububz7rtl7etUquOces6Bu3NglaYuIlDj57iNdvnx5WrZsyQsvvMB33313yd0NRUSkaHTrZra4q1rV/ni1aubxmBj473/h119hwACz+P7mG2jSBLp2hZ9+ck3eIiIlSb4L6ffff5+aNWsyf/582rRpQ0BAAG3atOHFF18kISGBrKyswsxTREQu0q0b7N0Lq1fD/Pnmn3v22PePrl7dnJXetQuio80Z68WLoUEDc+vxnTtdlb2IyLUv34V0dHQ07733Hr/99hv79+9n1qxZ1KhRgw8//JCwsDACAgKIjIwszFxFROQinp7mzYQ9e5p/Xq5v9E03wccfw/bt0KOHeeyzz6BuXXjoIfj996LKWESk5HB4i3CAatWq8dBDD/H+++/z7bffMnr0aDw9PVmxYoWz8xMRESeqVQs+/dRcP33vvZCTA3PmmLsmxsTAvn2uzlBE5NrhcCH9xx9/8P777xMdHU1ISAiNGzdm8+bNjBw5krVr1xZGjiIi4mQNGphLPDZvNm9CzM6G996Dm2+GJ59UD2oRkfzI94Ysffv2ZfXq1Zw8eZIWLVpw9913M2jQIJo0aYKn9qAVEbkmNWkCy5bB+vXw/PNmd48334QPPoAnnoBnn4VKlVydpYhI8ZTvGek5c+bg4eHB6NGjGT9+PM888wxNmzZVES0iUgI0bw4rV5qFdIsWcO4cvPYa1KgBo0ZdeUtyERF3le9CeseOHTz77LNs2bKFjh07UqFCBTp16sTUqVP54YcfyMnJKcw8RUSkCLRuDevWwfLlcPvtcOYMvPyyWVCPGQNpaa7OUESk+Mh3IX3rrbfy+OOPs2DBAlJSUvj+++/p0KEDmzZtolOnTlSoUIGoqKjCzFVERIqAxQKRkbBxI/zvf+Z66pMnYfx4s6CeOBFOnXJ1liIirlegrh0AtWvXpmvXrnTr1o3OnTtjGAbffPONM3MTEREXsligc2fYuhU+/9zs+JGaCv/5j1lQv/qqOWMtIuKuHCqkjx49ymeffcYTTzxBrVq1qFq1Kg8//DC7du1iyJAhrFq1qrDyFBERF/HwMLcj//lnmDvX7En9118wfDjceCPMmAHa7FZE3FG+u3bUrl2bX375hVKlSnH77bdz33330bp1a1q0aIGPj09h5igiIsWApyf07m3uiPjxx+ZSj337YPBgmDIFnnsOHn4YvLxcnamISNHI94z0vffeyzfffENqaioJCQm89NJLtG3bVkW0iIibKVUK+veH3bvhrbegalU4cAAee8zc2OWjj+D8eVdnKSJS+PJdSE+aNImIiAj8/PwKMx8REblGeHvD44/Db7/B9OlQuTLs2QP9+kGdOvDJJ+bOiSIiJVW+CumXX36ZM/m8o2Tjxo0sXbr0qpISEZFrh48PPP00/P47vPIKVKxozlb36mV2/PjySzAMV2cpIuJ8+Sqkd+zYwfXXX88TTzzBN998w7Fjx2znzp8/z08//cSbb75J8+bNefDBBylXrlyhJSwiIsVT6dIwYgT88Qe8+CL4+0NyMtx3n7mD4tKlKqhFpGTJVyH98ccfs2rVKnJycujduzdBQUF4e3tTtmxZrFYrjRo14oMPPqBfv37s2rWLli1bFnbeIiJSTJUrZ954uGeP+WeZMmYLvagocwfFFStUUItIyZDvNdL169fn7bff5vjx42zdupXPP/+cd999l2+//ZY///yTH374gUcffRSr1ZrvN1+7di2dOnUiODgYi8XC4sWL7c4bhsHYsWMJDg7G19eXVq1asX37druYjIwMnnrqKQIDAyldujSdO3fm4MGDdjGpqalER0fj7++Pv78/0dHRnDhxwi5m//79dOrUidKlSxMYGMjgwYPJzMy0i/n5558JCwvD19eXqlWrMn78eAz9v4GIyCUFBJgz03v2wDPPgK8vbNgA4eHQqhWsXevqDEVEro7DG7JYLBYaNGjAvffey4MPPki7du0IDAws0JufPn2aBg0aMHPmzEuenzx5MtOmTWPmzJls3ryZoKAgwsPDOXnypC0mNjaWRYsWsWDBAhISEjh16hRRUVFkZ2fbYnr16kVSUhLLly9n+fLlJCUlER0dbTufnZ1Nx44dOX36NAkJCSxYsICFCxcybNgwW0x6ejrh4eEEBwezefNmZsyYwdSpU5k2bVqBPruIiLsIDITJk80lH4MHmzcprl0LYWEQEWHuoCgick0yignAWLRoke15Tk6OERQUZLz88su2Y+fOnTP8/f2NWbNmGYZhGCdOnDC8vLyMBQsW2GIOHTpkeHh4GMuXLzcMwzB27NhhAMaGDRtsMYmJiQZg7Nq1yzAMw1i2bJnh4eFhHDp0yBbzySefGFar1UhLSzMMwzDefPNNw9/f3zh37pwtZtKkSUZwcLCRk5OT78+ZlpZmALbrSuHIzMw0Fi9ebGRmZro6FSlCGvdrw4EDhvH444ZRqpRhmIs8DCMqyjC2bnX8Whpz96Mxdz+uGPP81mv53pClqO3Zs4cjR44QERFhO2a1WgkLC2P9+vU89thjbNmyhaysLLuY4OBg6taty/r164mMjCQxMRF/f3+aNm1qi7nzzjvx9/dn/fr11KxZk8TEROrWrUtwcLAtJjIykoyMDLZs2ULr1q1JTEwkLCzMbulKZGQko0aNYu/evdSoUeOSnyMjI4OMC7b8Sk9PByArK4usrKyr/6LkknK/W33H7kXjfm2oXBneeAOGDIGJEz2ZO9fC119b+Ppr6NIlhxdeyKZu3fxdS2PufjTm7scVY57f9yq2hfSRI0cAqFy5st3xypUrs2/fPluMt7c3AQEBeWJyX3/kyBEqVaqU5/qVKlWyi7n4fQICAvD29raLCQ0NzfM+uecuV0hPmjSJcePG5TkeFxenntxFID4+3tUpiAto3K8dXbvCHXeU5tNPa7JuXTUWL/bgf/+zcNddh3jwwV1UrXo6X9fRmLsfjbn7Kcoxz2/b52JbSOeyWCx2zw3DyHPsYhfHXCreGTHG/99oeKV8Ro0axdChQ23P09PTCQkJISIiQm0CC1FWVhbx8fGEh4fjpf2K3YbG/doVEwPbt5/nxRc9+fJLD9atq8b331elTx+D//wnm8vMVWjM3ZDG3P24YsxzVxD8m2JbSAcFBQHmbG+VKlVsx48ePWqbCQ4KCiIzM5PU1FS7WemjR4/SvHlzW8yff/6Z5/rHjh2zu87Gi+52SU1NJSsryy4md3b6wveBvLPmF7JarZfsZOLl5aW/AIqAvmf3pHG/NjVsCAsXQlISvPACfPWVhY8/tjB/vgf9+5ut9EJC/onPzob16y2sXVuV0qW9ad26FJ6erspeipp+z91PUY55ft/H4a4dAJs3b2bEiBE8+OCDdOvWze7hLDVq1CAoKMhuGj8zM5M1a9bYiuTGjRvj5eVlF5OSkkJycrItplmzZqSlpbFp0yZbzMaNG0lLS7OLSU5OJiUlxRYTFxeH1WqlcePGtpi1a9fatcSLi4sjODg4z5IPEREpuIYNYckSs5tHRAScPw/vvAM33WR2/UhJMXdLDA2F8PBSTJvWhPDwUoSGmsdFRIqKw4X0ggULaNGiBTt27GDRokVkZWWxY8cOVq1ahb+/v0PXOnXqFElJSSQlJQHmDYZJSUns378fi8VCbGwsEydOZNGiRSQnJ9OvXz/8/Pzo1asXAP7+/jzyyCMMGzaMlStXsm3bNvr06UO9evVo164dALVq1aJ9+/bExMSwYcMGNmzYQExMDFFRUdSsWROAiIgIateuTXR0NNu2bWPlypUMHz6cmJgY2/KLXr16YbVa6devH8nJySxatIiJEycydOjQf11qIiIijrvjDvj2W1i3zmyVl5kJM2aYBfR998FFWwZw6BB0765iWkSKkKPtQOrVq2fMnDnTMAzDKFOmjPH7778bOTk5RkxMjPHCCy84dK3Vq1cbQJ5H3759DcMwW+CNGTPGCAoKMqxWq3H33XcbP//8s901zp49awwaNMioUKGC4evra0RFRRn79++3izl+/LjRu3dvo2zZskbZsmWN3r17G6mpqXYx+/btMzp27Gj4+voaFSpUMAYNGmTX6s4wDOOnn34yWrZsaVitViMoKMgYO3asQ63vDEPt74qK2iO5J417yZWTYxgrVhjGnXf+0zLvUg+LxTBCQgzj/HlXZyyFRb/n7qc4t7+zGIZjW/OVLl2a7du3ExoaSmBgIKtXr6ZevXrs3LmTNm3a2C2PkLzS09Px9/cnLS1NNxsWoqysLJYtW0aHDh20hs6NaNxLvtWroU2b/MW1alXo6YgL6Pfc/bhizPNbrzm8tKNChQq2nQWrVq1KcnIyACdOnMh3qxAREZGCuOie78vSnI6IFAWHu3a0bNmS+Ph46tWrR48ePXj66adZtWoV8fHxtG3btjByFBERAeCCJk5OiRMRuRoOF9IzZ87k3LlzgNkj2cvLi4SEBLp168bzzz/v9ARFRERytWwJ1aqZNxZebmGipyd4FKgnlYiIYwq0tCN3K20PDw9GjBjBkiVLmDZtWp4dBkVERJzJ0xNef938+eKGSbnPs7PNddSTJ0NOTtHmJyLuJV+F9IW7u6Snp1/xISIiUpi6dYMvvoCqVe2PV6sGc+dCz55mMT1yJHTuDMePuyZPESn58rW0IyAggJSUFCpVqkT58uWvuJ12dna205MUERG5ULducO+9sHr1eb75Jol77mlo29mwVy+zY8fgwbB0KTRqBJ99Bnfe6eqsRaSkyVchvWrVKipUqADA6tWrCzUhERGR/PD0hLAwg9OnDxEW1sC2PbjFAo8+am7ocv/98Ntv5trqV16BIUPyLgkRESmofBXSYWFhtp9r1KhBSEhInllpwzA4cOCAc7MTEREpoIYNYcsWGDAAPv8chg2DtWvhww9Bt/SIiDM4fLNhjRo1OHbsWJ7jf//9NzVq1HBKUiIiIs5Qrhx8+in897/g7Q3/+x/cdhts3uzqzESkJHC4kM5dC32xU6dO4ePj45SkREREnMVigYEDYf16uOEG2LsXWrSAN964fAs9EZH8yHcf6aFDhwJgsVh4/vnn8fPzs53Lzs5m48aNNGzY0OkJioiIOEPjxuZSj0cegS+/hKefhjVr4P33oXx5V2cnIteifBfS27ZtA8wZ6Z9//hlvb2/bOW9vbxo0aMDw4cOdn6GIiIiTlC9vts6bMQOGDzcL6qQkcw31bbe5OjsRudbku5DO7dbRr18/ZsyYQdmyZQstKRERkcJisZit8e68E3r0gD/+gGbN4LXX4Ikn1NVDRPLPoTXS58+fZ+7cuezbt6+w8hERESkSd9wB27aZ/agzM+HJJ+HBB0F7i4lIfjlUSJcqVYrq1atr0xURESkRAgJg0SJ49VUoVcrcuKVJE3O5h4jIv3G4a8dzzz3HqFGj+PvvvwsjHxERkSJlscDQoWaP6ZAQ+PVXc9nH22+rq4eIXFm+10jneuONN/jtt98IDg6mevXqlC5d2u781q1bnZaciIhIUWnWzFzq0bevubX444+bxfXbb0OZMq7OTkSKI4cL6S5duhRCGiIiIq5XsSIsWQJTp8Lo0TB/vtky7/PPoV49V2cnIsWNw4X0mDFjCiMPERGRYsHDA0aMgObNzZsPf/nFvDFx5kzo319dPUTkHw6vkQY4ceIE7733nt1a6a1bt3Lo0CGnJiciIuIqd91lLvVo3x7OnYMBA8xlH6dPuzozESkuHC6kf/rpJ2655RZeeeUVpk6dyokTJwBYtGgRo0aNcnZ+IiIiLnPddeZ66YkTzZnqOXPg9tth+3ZXZyYixYHDhfTQoUPp168fv/76Kz4+Prbj99xzD2vXrnVqciIiIq7m4QGjRsGqVVClCuzcaS71+OgjV2cmIq7mcCG9efNmHnvssTzHq1atypEjR5ySlIiISHETFmb2lw4PhzNnoF8/c830mTOuzkxEXMXhQtrHx4f0S2z79Msvv3Ddddc5JSkREZHiqFIl+OYbGD/evOnwww+haVPYtcvVmYmIKzhcSN97772MHz+erKwsACwWC/v37+fZZ5/lvvvuc3qCIiIixYmnJzz/PKxYAZUrQ3KyuRvivHmuzkxEiprDhfTUqVM5duwYlSpV4uzZs4SFhXHTTTdRtmxZJkyYUBg5ioiIFDtt2phLPVq3Njt59OkDjz4KZ8+6OjMRKSoO95EuV64cCQkJrFq1iq1bt5KTk8Ntt91Gu3btCiM/ERGRYisoCOLjzaUeL74I774LmzbBZ5/BLbe4OjsRKWwOF9K52rRpQ5s2bZyZi4iIyDXH0xPGjTP7TvfuDT/+CI0bw3vvwQMPuDo7ESlMBSqkV65cycqVKzl69Cg5OTl25z744AOnJCYiInItCQ83l3r07Alr15q7Iq5ZA9OmwQXdYkWkBHF4jfS4ceOIiIhg5cqV/PXXX6Smpto9RERE3FVwMKxcCaNHm8/fesvcavz3312bl4gUDodnpGfNmsXs2bOJjo4ujHxERESuaaVKwYQJ0LKleQPitm1w223wwQeg5lYiJYvDM9KZmZk0b968MHIREREpMdq3N5d6tGgB6enQvTsMHgwZGa7OTEScxeFCesCAAcyfP78wchERESlRqlWD1athxAjz+YwZ5k2Je/a4Ni8RcQ6Hl3acO3eOd955hxUrVlC/fn28vLzszk+bNs1pyYmIiFzrvLzglVfMpR59+8IPP0CjRjB7NnTp4ursRORqOFxI//TTTzRs2BCA5ORku3MWi8UpSYmIiJQ0UVHmeukHHoANG6BrVxgyBF5+Gby9XZ2diBSEw4X06tWrCyMPERGREu/6682WeKNGmW3xXnsN1q+HTz+F6tVdnZ2IOMrhNdIiIiJScN7e8OqrsHgxlC8PGzeaSz2++srVmYmIo/I9I92tW7d8xX355ZcFTkZERMRd3HuvudSjRw/YvBk6d4bhw2HiRHNdtYgUf/kupP39/QszDxEREbcTGgoJCWZXj9dfh6lTzaUeCxZASIirsxORf5PvQvrDDz8szDxERETckrc3TJ8Od98N/fubhXSjRjBnDtxzj6uzE5Er0RppERGRYqBbN9i61dwF8fhx6NDBvCnx/HlXZyYil6NCWkREpJi44QZzRvrJJ83nL78MbdrAoUOuzUtELk2FtIiISDFitcLMmWZLvLJlYd06c6lHXJx5PjsbvvsOPvnE/DM725XZirg3FdIiIiLFUI8esGULNGwIx45B+/Zw//3mDYqtW0OvXuafoaGghlkirqFCWkREpJi6+WZITITHHgPDgC++gIMH7WMOHYLu3VVMi7iCCmkREZFizMcH/vtfqFDh0ucNw/wzNlbLPESKmgppERGRYm7dOvj778ufNww4cMCME5Gio0JaRESkmEtJyV/cgQOFm4eI2FMhLSIiUsxVqZK/uNhYmDDBvDlRRAqfCmkREZFirmVLqFYNLJbLx3h4mMs/nnvO3F58wAD4+eeiy1HEHamQFhERKeY8PeH1182fLy6mLRbz8ckn5rbijRtDRga8/z7Urw9t28KSJboRUaQwqJAWERG5BnTrZra/q1rV/ni1aubxHj2gTx/YvBkSEsye0x4esGoV3Hsv3HKLWYynp7smf5GSSIW0iIjINaJbN9i7F1avhvnzzT/37DGP57JYoEUL+Owz89yIEVC+PPzxh7mGulo188/ff3fNZxApSVRIi4iIXEM8PaFVK+jZ0/zT0/PysddfD6+8Ym7i8tZbcOutcPKkOTN9883QubM5Y53bi1pEHFOsC+nz58/z3HPPUaNGDXx9fbnhhhsYP348OTk5thjDMBg7dizBwcH4+vrSqlUrtm/fbnedjIwMnnrqKQIDAyldujSdO3fm4EVbQ6WmphIdHY2/vz/+/v5ER0dz4sQJu5j9+/fTqVMnSpcuTWBgIIMHDyYzM7PQPr+IiIgzlC4Njz8O27fD8uVwzz1m8fzVV+Ya6gYNzDXVZ8+6OlORa0uxLqRfeeUVZs2axcyZM9m5cyeTJ09mypQpzJgxwxYzefJkpk2bxsyZM9m8eTNBQUGEh4dz8uRJW0xsbCyLFi1iwYIFJCQkcOrUKaKiosi+4M6LXr16kZSUxPLly1m+fDlJSUlER0fbzmdnZ9OxY0dOnz5NQkICCxYsYOHChQwbNqxovgwREZGr5OEBkZGwbBns3AkDB4Kfn9ndY8AAs9vHc8/B4cOuzlTkGmEUYx07djT69+9vd6xbt25Gnz59DMMwjJycHCMoKMh4+eWXbefPnTtn+Pv7G7NmzTIMwzBOnDhheHl5GQsWLLDFHDp0yPDw8DCWL19uGIZh7NixwwCMDRs22GISExMNwNi1a5dhGIaxbNkyw8PDwzh06JAt5pNPPjGsVquRlpaW78+UlpZmAA69RhyXmZlpLF682MjMzHR1KlKENO7uR2N+9f7+2zCmTDGM6tUNw5ynNoxSpQyjVy/D2LjR1dnlpTF3P64Y8/zWa6VcW8Zf2V133cWsWbPYvXs3t9xyCz/++CMJCQlMnz4dgD179nDkyBEiIiJsr7FarYSFhbF+/Xoee+wxtmzZQlZWll1McHAwdevWZf369URGRpKYmIi/vz9Nmza1xdx55534+/uzfv16atasSWJiInXr1iU4ONgWExkZSUZGBlu2bKF169aX/AwZGRlkZGTYnqf//+3SWVlZZGVlOeV7krxyv1t9x+5F4+5+NOZXr0wZePppePJJWLLEwsyZHiQkeDB/vnlD45135jBoUA5duxp4ebk6W425O3LFmOf3vYp1IT1y5EjS0tK49dZb8fT0JDs7mwkTJtCzZ08Ajhw5AkDlypXtXle5cmX27dtni/H29iYgICBPTO7rjxw5QqVKlfK8f6VKlexiLn6fgIAAvL29bTGXMmnSJMaNG5fneFxcHH5+flf8/HL14uPjXZ2CuIDG3f1ozJ3DxweGD4euXf35+usbWLeuGhs2eLBhgwcVK56lQ4c9hIfvpVw51xexGnP3U5RjfubMmXzFFetC+tNPP2Xu3LnMnz+fOnXqkJSURGxsLMHBwfTt29cWZ7moO71hGHmOXezimEvFFyTmYqNGjWLo0KG25+np6YSEhBAREUG5cuWumKMUXFZWFvHx8YSHh+NVHKZQpEho3N2PxrzwPPUUHDmSzTvvGLzzjgdHj/oyZ05tvviiFr175/DkkznUqVP0eWnM3Y8rxjw9nw3Xi3Uh/cwzz/Dss8/y4IMPAlCvXj327dvHpEmT6Nu3L0FBQYA5W1ylShXb644ePWqbPQ4KCiIzM5PU1FS7WemjR4/SvHlzW8yff/6Z5/2PHTtmd52NGzfanU9NTSUrKyvPTPWFrFYrVqs1z3EvLy/9BVAE9D27J427+9GYF46QEHjxRfMGxAULYPp0SEqy8N57nrz3nifh4eaykHvuMW9kLEoac/dTlGOe3/cp1l07zpw5g8dFv5menp629nc1atQgKCjIbqo/MzOTNWvW2Irkxo0b4+XlZReTkpJCcnKyLaZZs2akpaWxadMmW8zGjRtJS0uzi0lOTiYlJcUWExcXh9VqpXHjxk7+5CIiIsWH1Qp9+8LWrbBmjbkBjIcHxMdDVJTZn3rmTLNHtYg7KdaFdKdOnZgwYQJLly5l7969LFq0iGnTptG1a1fAXGoRGxvLxIkTWbRoEcnJyfTr1w8/Pz969eoFgL+/P4888gjDhg1j5cqVbNu2jT59+lCvXj3atWsHQK1atWjfvj0xMTFs2LCBDRs2EBMTQ1RUFDVr1gQgIiKC2rVrEx0dzbZt21i5ciXDhw8nJiZGSzRERMQtWCxw992wcKG5M+KwYeDvD7/+ai4FqVbNPLZnj6szFSkaxbqQnjFjBt27d2fgwIHUqlWL4cOH89hjj/Hiiy/aYkaMGEFsbCwDBw6kSZMmHDp0iLi4OMqWLWuLee211+jSpQs9evSgRYsW+Pn58dVXX+F5wXZQ8+bNo169ekRERBAREUH9+vWZM2eO7bynpydLly7Fx8eHFi1a0KNHD7p06cLUqVOL5ssQEREpRkJDYepUc9fEmTPhllsgPR2mTYObbjJnrdes0a6JUrJZDEP/Ey9K6enp+Pv7k5aWppnsQpSVlcWyZcvo0KGD1tC5EY27+9GYFx85Oeauia+/DnFx/xxv0ABiY+HBB82uIFdLY+5+XDHm+a3XivWMtIiIiFwbPDygQwf49ltzK/LHHgNfX/jxR3j4Ybj+ehgzBq7QMVbkmqNCWkRERJyqdm2YNctc9vHyy+ba6WPHYPx4s6B+6CHYssXVWYpcPRXSIiIiUigqVICRI+GPP+DTT6F5c8jKgjlzoEkTaNkSvvgCzp93daYiBaNCWkRERAqVlxf06AHffw+bNkHv3lCqFCQkwP33w403wpQpkJrq6kxFHKNCWkRERIrM7bfD3Lmwb5+50UtgIOzfDyNGmEtABg6EXbsu/drsbFizxsLatVVZs8ZCdnbR5i5yMRXSIiIiUuSCg81dEw8cgPffh/r14cwZeOstqFXL3C1x+XKzGwjAl1+aLffCw0sxbVoTwsNLERpqHhdxFRXSIiIi4jI+PtC/PyQlwapVcO+95sYvy5ebxXTt2vDoo9C9u3nz4oUOHTKPq5gWV1EhLSIiIi5nsUDr1rB4sblTYmwslC0Lv/wC77576Y1dco/FxqJlHuISKqRFRESkWLnxRnjtNXMGetCgK8cahrk8ZN26oslN5EKlXJ2AiIiIyKWUK2e2zJs5899jH3gAmjaFOnX+edx6q7kpjEhhUSEtIiIixVaVKvmLO3oUvvrKfOTy8IAbbrAvruvUgZo1nbNduYgKaRERESm2WrY02+IdOnTpddIWi1lsf/SR2TZv+/Z/Hn//Db/9Zj7+979/XuPhATfdZBbVtWvbF9hWa9F9Nrn2qZAWERGRYsvTE15/3ezOYbHYF9MWi/nnjBnQrp35yGUY8Oef9oV17uPECdi923wsWmT/XrkF9oWPW24Bb+8i+bhyjVEhLSIiIsVat27mVuJPP23fAq9aNZg+3Tx/MYsFgoLMR9u2/xw3DDhy5NIFdlqa2SXkl1/sW+qVKgU332xfXNeubRbYXl6F9rHlGqBCWkRERIq9bt3MHtOrV5/nm2+SuOeehrRuXQpPT8euk7sUpEqVvDPYhw/bF9Y7dph/pqfDzp3m44sv/nlNqVJmMX3xDPZNN6nAdhcqpEVEROSa4OkJYWEGp08fIiysgcNF9JVYLFC1qvmIiPjnuGGY67Mvnr3esQNOnjT/3LEDPv/8n9d4eZnrrS8usG+80Sy+CyI722zxl5Ji/iOgZUuc+vmlYFRIi4iIiFyGxWIuIalWDSIj/zme27/64uJ6xw44dQqSk83Hhby9L19gX6ko/vLLSy9ref31Sy9rkaKjQlpERETEQRYLXH+9+bjnnn+O5+TkLbBzi+wzZ+Dnn83HhaxWs+f1xQV2jRpmt5Hu3fN2LMndHv2LL1RMu5IKaREREREn8fCA6tXNR4cO/xzPyYF9++zXXuf+fPYs/Pij+biQ1Wq+7nLbo1ss5vbo996rZR6uokJaREREpJB5eJgzzDVqQFTUP8dzcmDv3rwz2Dt3wrlzV77mhdujt2pVmNnL5aiQFhEREXGR3N0Xb7gBOnX653h2NrzxBgwd+u/XSEkpvPzkyjxcnYCIiIiI2PP0hEaN8hc7b565e6MUPRXSIiIiIsVQ7vbouTs4Xs7SpWY3kN69897IKIVLhbSIiIhIMZS7PTrkLaYtFvMxYYJ5U2NODsyfD/XrmzcfbtpU9Pm6IxXSIiIiIsVU7vboVavaH69WzTw+erQ5I711K9x/v1lcL1kCTZuaOzeuWnXprh/iHCqkRURERIqxbt3Mzh6rV5uzzqtXw5499v2jGzWCzz4zu308/LC5g+LKldC2LTRvDl99pYK6MKiQFhERESnmPD3NFnc9e5p/Xq5vdM2a8MEH5s2HgwaBjw9s2ACdO0ODBrBggdkRRJxDhbSIiIhICVO9OsyYYc5kjxwJZcuaNyL27Gnuovj++5CZ6eosr30qpEVERERKqMqV4eWXzV0VX3wRKlY0Z6sHDIAbbzRvZjx92tVZXrtUSIuIiIiUcAEB8Nxz5gz1tGkQHAwHD5pbjIeGwsSJcOKEa3O8FqmQFhEREXETZcrAkCHwxx/w9tvmjop//QX/+Y+5HOQ//4GjR12d5bVDhbSIiIiIm7Fa4dFH4ZdfzJ0R69SB9HRzZjo0FJ5+Gg4ccHWWxZ8KaRERERE3VaoU9OoFP/0EixfD7bfD2bPwxhvmGuoBA+DXX12dZfGlQlpERETEzXl4mDsibtwI8fHQujVkZZndPW691ez28dNPrs6y+FEhLSIiIiKAuTNi7o6I69dDVJS5/fiCBWYf6k6dIDHR1VkWHyqkRURERCSPZs3MHRGTkuCBB8wi++uvzZ0S27SBFSu0W6IKaRERERG5rNwdEXftgkceAS8vc5vy8HC480743//MWWt3pEJaRERERP7VLbfAe+/B77/D4MHg6wubNkGXLmaxPX8+nD/v6iyLlgppEREREcm3kBBzR8S9e2HUKChXDpKToXdvqFkT3nkHMjJcnWXRUCEtIiIiIg6rVMnsO71vH0yYAIGB5kYvjz1mbvTy2mslf/txFdIiIiIiUmDly8Po0eYM9fTpULUqHD4MQ4eauyW+9BKkpro4yUKiQlpERERErlrp0uaOiL//Du++a27ocvw4PP+8WVA/+yz8+aers3QuFdIiIiIi4jRWq7kj4q5d8MknUK8enDwJr7xibj/+1FOwf7+rs3QOFdIiIiIi4nSlSsGDD5p9qJcsgaZN4dw5mDnTnK3u3x9++cXVWV4dFdIiIiIiUmg8PP7ZEXHlSmjb1myT9+GHUKsW9OhhFtvXIhXSIiIiIlLoLJZ/dkRMTITOnc2dET//HBo1go4dzW3JL5SdDWvWWFi7tipr1ljIznZN7pejQlpEREREilTujog//QQ9e5qz1suWQYsW0KoVxMXBwoXmmurw8FJMm9aE8PBShIbCl1+6OPkLqJAWEREREZeoV8/cEfGXXyAmxtx+fM0aiIyE7t3h4EH7+EOHzOPFpZhWIS0iIiIiLnXTTeaOiH/8YW4/fjmGYf4ZG0uxWOahQlpEREREioVq1aBr1yvHGAYcOADr1hVNTleiQlpEREREio2UFOfGFSYV0iIiIiJSbFSp4ty4wqRCWkRERESKjZYtzSUeFsulz1ssEBJixrlasS+kDx06RJ8+fahYsSJ+fn40bNiQLVu22M4bhsHYsWMJDg7G19eXVq1asX37drtrZGRk8NRTTxEYGEjp0qXp3LkzBy+6DTQ1NZXo6Gj8/f3x9/cnOjqaEydO2MXs37+fTp06Ubp0aQIDAxk8eDCZmZmF9tlFRERE3I2nJ7z+uvnzxcV07vPp0804VyvWhXRqaiotWrTAy8uLb775hh07dvDqq69Svnx5W8zkyZOZNm0aM2fOZPPmzQQFBREeHs7JkydtMbGxsSxatIgFCxaQkJDAqVOniIqKIvuC2z179epFUlISy5cvZ/ny5SQlJREdHW07n52dTceOHTl9+jQJCQksWLCAhQsXMmzYsCL5LkRERETcRbdu8MUXULWq/fFq1czj3bq5Jq88jGJs5MiRxl133XXZ8zk5OUZQUJDx8ssv246dO3fO8Pf3N2bNmmUYhmGcOHHC8PLyMhYsWGCLOXTokOHh4WEsX77cMAzD2LFjhwEYGzZssMUkJiYagLFr1y7DMAxj2bJlhoeHh3Ho0CFbzCeffGJYrVYjLS0t358pLS3NABx6jTguMzPTWLx4sZGZmenqVKQIadzdj8bc/WjM3cv584YRH59lDB262YiPzzLOny+a981vvVbKtWX8lS1ZsoTIyEjuv/9+1qxZQ9WqVRk4cCAxMTEA7NmzhyNHjhAREWF7jdVqJSwsjPXr1/PYY4+xZcsWsrKy7GKCg4OpW7cu69evJzIyksTERPz9/WnatKkt5s4778Tf35/169dTs2ZNEhMTqVu3LsHBwbaYyMhIMjIy2LJlC61bt77kZ8jIyCAjI8P2PD09HYCsrCyysrKc80VJHrnfrb5j96Jxdz8ac/ejMXc/zZtncfr0IZo3r01OjkFOTuG/Z37/91WsC+k//viDt956i6FDhzJ69Gg2bdrE4MGDsVqtPPTQQxw5cgSAypUr272ucuXK7Nu3D4AjR47g7e1NQEBAnpjc1x85coRKlSrlef9KlSrZxVz8PgEBAXh7e9tiLmXSpEmMGzcuz/G4uDj8/Pz+7SuQqxQfH+/qFMQFNO7uR2PufjTm7qcox/zMmTP5iivWhXROTg5NmjRh4sSJADRq1Ijt27fz1ltv8dBDD9niLBetRDcMI8+xi10cc6n4gsRcbNSoUQwdOtT2PD09nZCQECIiIihXrtwVc5SCy8rKIj4+nvDwcLy8vFydjhQRjbv70Zi7H425+3HFmOeuIPg3xbqQrlKlCrVr17Y7VqtWLRYuXAhAUFAQYM4WV7mgmeDRo0dts8dBQUFkZmaSmppqNyt99OhRmjdvbov5888/87z/sWPH7K6zceNGu/OpqalkZWXlmam+kNVqxWq15jnu5eWlvwCKgL5n96Rxdz8ac/ejMXc/RTnm+X2fYt21o0WLFvzyyy92x3bv3k316tUBqFGjBkFBQXZT/ZmZmaxZs8ZWJDdu3BgvLy+7mJSUFJKTk20xzZo1Iy0tjU2bNtliNm7cSFpaml1McnIyKRdsoxMXF4fVaqVx48ZO/uQiIiIiUtwV6xnpIUOG0Lx5cyZOnEiPHj3YtGkT77zzDu+88w5gLrWIjY1l4sSJ3Hzzzdx8881MnDgRPz8/evXqBYC/vz+PPPIIw4YNo2LFilSoUIHhw4dTr1492rVrB5iz3O3btycmJoa3334bgEcffZSoqChq1qwJQEREBLVr1yY6OpopU6bw999/M3z4cGJiYrREQ0RERMQNFetC+vbbb2fRokWMGjWK8ePHU6NGDaZPn07v3r1tMSNGjODs2bMMHDiQ1NRUmjZtSlxcHGXLlrXFvPbaa5QqVYoePXpw9uxZ2rZty+zZs/G8oJP3vHnzGDx4sK27R+fOnZk5c6btvKenJ0uXLmXgwIG0aNECX19fevXqxdSpU4vgmxARERGR4qZYF9IAUVFRREVFXfa8xWJh7NixjB079rIxPj4+zJgxgxkzZlw2pkKFCsydO/eKuVx//fV8/fXX/5qziIiIiJR8xXqNtIiIiIhIcaVCWkRERESkAFRIi4iIiIgUgAppEREREZECUCEtIiIiIlIAKqRFRERERAqg2Le/K2kMwwDyv4e7FExWVhZnzpwhPT1dW8i6EY27+9GYux+NuftxxZjn1mm5ddvlqJAuYidPngQgJCTExZmIiIiIyJWcPHkSf3//y563GP9WaotT5eTkcPjwYcqWLYvFYnF1OiVWeno6ISEhHDhwQFu4uxGNu/vRmLsfjbn7ccWYG4bByZMnCQ4OxsPj8iuhNSNdxDw8PKhWrZqr03Ab5cqV01+0bkjj7n405u5HY+5+inrMrzQTnUs3G4qIiIiIFIAKaRERERGRAlAhLSWS1WplzJgxWK1WV6ciRUjj7n405u5HY+5+ivOY62ZDEREREZEC0Iy0iIiIiEgBqJAWERERESkAFdIiIiIiIgWgQlpEREREpABUSEuJMmnSJG6//XbKli1LpUqV6NKlC7/88our05IiNGnSJCwWC7Gxsa5ORQrRoUOH6NOnDxUrVsTPz4+GDRuyZcsWV6clhej8+fM899xz1KhRA19fX2644QbGjx9PTk6Oq1MTJ1m7di2dOnUiODgYi8XC4sWL7c4bhsHYsWMJDg7G19eXVq1asX37dtck+/9USEuJsmbNGp588kk2bNhAfHw858+fJyIigtOnT7s6NSkCmzdv5p133qF+/fquTkUKUWpqKi1atMDLy4tvvvmGHTt28Oqrr1K+fHlXpyaF6JVXXmHWrFnMnDmTnTt3MnnyZKZMmcKMGTNcnZo4yenTp2nQoAEzZ8685PnJkyczbdo0Zs6cyebNmwkKCiI8PJyTJ08Wcab/UPs7KdGOHTtGpUqVWLNmDXfffber05FCdOrUKW677TbefPNNXnrpJRo2bMj06dNdnZYUgmeffZbvv/+edevWuToVKUJRUVFUrlyZ999/33bsvvvuw8/Pjzlz5rgwMykMFouFRYsW0aVLF8CcjQ4ODiY2NpaRI0cCkJGRQeXKlXnllVd47LHHXJKnZqSlREtLSwOgQoUKLs5ECtuTTz5Jx44dadeunatTkUK2ZMkSmjRpwv3330+lSpVo1KgR7777rqvTkkJ21113sXLlSnbv3g3Ajz/+SEJCAh06dHBxZlIU9uzZw5EjR4iIiLAds1qthIWFsX79epflVcpl7yxSyAzDYOjQodx1113UrVvX1elIIVqwYAFbt25l8+bNrk5FisAff/zBW2+9xdChQxk9ejSbNm1i8ODBWK1WHnroIVenJ4Vk5MiRpKWlceutt+Lp6Ul2djYTJvxfe/ceFGXVxwH8u6JyE0EuLhKJpKCABCSlLSkaKt0kqxFDJkQmzAkKdcPABgQbkLUyHaVGSyFzFp1GUXOMoXHbULyAxIqhoGzEWGKkMl7CQOC8fzTu27q7gvsqq7zfz8zO8Jzb8zvPMPDbw9lDDmJiYiwdGvWBCxcuAACkUqleuVQqRVNTkyVCAsBEmvqx5ORk1NTU4NChQ5YOhe6jc+fOISUlBaWlpbCxsbF0ONQHuru7ERoaitzcXABASEgIamtr8fnnnzOR7sd27NiBbdu2QalUIiAgABqNBosXL4aHhwfmz59v6fCoj0gkEr1rIYRBWV9iIk390jvvvIO9e/eirKwMnp6elg6H7qOqqiq0tLRgwoQJurKuri6UlZVhw4YNaG9vh5WVlQUjpHttxIgR8Pf31yvz8/PDzp07LRQR9YXU1FSkpaXh9ddfBwAEBgaiqakJq1atYiL9f8Dd3R3APyvTI0aM0JW3tLQYrFL3Je6Rpn5FCIHk5GTs2rULKpUK3t7elg6J7rOIiAicPHkSGo1G9woNDUVsbCw0Gg2T6H4oLCzM4FjLM2fOwMvLy0IRUV9oa2vDgAH6aYuVlRWPv/s/4e3tDXd3d3z//fe6so6ODvz444+QyWQWi4sr0tSvJCUlQalUYs+ePXBwcNDtqXJ0dIStra2Fo6P7wcHBwWAPvL29PVxcXLg3vp9asmQJZDIZcnNzER0djYqKCmzatAmbNm2ydGh0H82aNQs5OTkYOXIkAgICUF1djTVr1iAhIcHSodE9cv36dTQ0NOiuGxsbodFo4OzsjJEjR2Lx4sXIzc2Fj48PfHx8kJubCzs7O8ybN89iMfP4O+pXTO2TKigoQHx8fN8GQxYzdepUHn/Xz+3btw/p6ek4e/YsvL29sXTpUiQmJlo6LLqPrl27hoyMDBQXF6OlpQUeHh6IiYlBZmYmBg8ebOnw6B5Qq9WYNm2aQfn8+fNRWFgIIQSys7OxceNGtLa2YuLEicjPz7foogkTaSIiIiIiM3CPNBERERGRGZhIExERERGZgYk0EREREZEZmEgTEREREZmBiTQRERERkRmYSBMRERERmYGJNBERERGRGZhIExERERGZgYk0EdED5tdff4VEIoFGo7F0KDp1dXWYNGkSbGxsEBwcfNf9H8Q5/a82b96MmTNn6q7j4+Mxe/Zsk+03bNiAqKioPoiMiPoKE2kiotvEx8dDIpEgLy9Pr3z37t0m/w19f7dixQrY29ujvr4eBw4csHQ4KCwshJOTk8Xu397ejszMTGRkZPS6T2JiIiorK3Ho0KH7GBkR9SUm0kRERtjY2EChUKC1tdXSodwzHR0dZvfVarV45pln4OXlBRcXl3sYlWV1dXWhu7v7rvvt3LkTQ4YMweTJk3vdx9raGvPmzcP69evv+n5E9GBiIk1EZMT06dPh7u6OVatWmWyTlZVlsM1h7dq1GDVqlO761p/7c3NzIZVK4eTkhOzsbHR2diI1NRXOzs7w9PTEli1bDMavq6uDTCaDjY0NAgICoFar9epPnTqFF154AUOGDIFUKsUbb7yBixcv6uqnTp2K5ORkLF26FK6urpgxY4bReXR3d2PlypXw9PSEtbU1goODUVJSoquXSCSoqqrCypUrIZFIkJWVZXIchUKBMWPGwNraGiNHjkROTo7RtsZWlG9f8T9x4gSmTZsGBwcHDB06FBMmTMDx48ehVquxYMECXLlyBRKJRC+mjo4OLFu2DI888gjs7e0xceJEved267779u2Dv78/rK2t0dTUBLVajaeeegr29vZwcnJCWFgYmpqajMYOANu3b+9xm0ZVVRWGDx+u9wyioqKwe/du3Lhx4459iejhwESaiMgIKysr5ObmYv369fjtt9/+p7FUKhXOnz+PsrIyrFmzBllZWXjppZcwbNgwHDt2DIsWLcKiRYtw7tw5vX6pqamQy+Worq6GTCZDVFQULl26BABobm5GeHg4goODcfz4cZSUlOCPP/5AdHS03hhfffUVBg4ciPLycmzcuNFofOvWrcMnn3yCjz/+GDU1NYiMjERUVBTOnj2ru1dAQADkcjmam5vx3nvvGR0nPT0dCoUCGRkZOHXqFJRKJaRSqdnPLTY2Fp6enqisrERVVRXS0tIwaNAgyGQyrF27FkOHDkVzc7NeTAsWLEB5eTm2b9+OmpoazJkzB88995xuLgDQ1taGVatW4csvv0RtbS2cnZ0xe/ZshIeHo6amBkeOHMHChQvvuI3n4MGDCA0NNVmvVqsRERGB7OxsfPDBB7ry0NBQ3Lx5ExUVFWY/FyJ6gAgiItIzf/588fLLLwshhJg0aZJISEgQQghRXFws/v1jc8WKFSIoKEiv76effiq8vLz0xvLy8hJdXV26srFjx4rJkyfrrjs7O4W9vb0oKioSQgjR2NgoAIi8vDxdm5s3bwpPT0+hUCiEEEJkZGSImTNn6t373LlzAoCor68XQggRHh4ugoODe5yvh4eHyMnJ0St78sknxdtvv627DgoKEitWrDA5xtWrV4W1tbX44osvjNbfmlN1dbUQQoiCggLh6Oio1+b25+vg4CAKCwuNjmesf0NDg5BIJOL333/XK4+IiBDp6em6fgCERqPR1V+6dEkAEGq12uT8/q21tVUAEGVlZXrlt75vdu/eLRwcHIRSqTTaf9iwYSbnRUQPl4GWS+GJiB58CoUCzz77LORyudljBAQEYMCA//4BUCqVYvz48bprKysruLi4oKWlRa/f008/rft64MCBCA0NxenTpwH8s23ghx9+wJAhQwzup9Vq4evrCwB3XDUFgKtXr+L8+fMICwvTKw8LC8OJEyd6OUPg9OnTaG9vR0RERK/79GTp0qV488038fXXX2P69OmYM2cORo8ebbL9Tz/9BCGEbu63tLe36+3rHjx4MB5//HHdtbOzM+Lj4xEZGYkZM2Zg+vTpiI6OxogRI4ze59a2DBsbG4O6Y8eOYd++ffjmm2/wyiuvGO1va2uLtrY20xMnoocGt3YQEd3BlClTEBkZieXLlxvUDRgwAEIIvbKbN28atBs0aJDetUQiMVrWmw+93dpu0N3djVmzZkGj0ei9zp49iylTpuja29vb9zjmv8e9RQhxVyeU2Nra9rot0Ltnl5WVhdraWrz44otQqVTw9/dHcXGxyTG7u7thZWWFqqoqvWdy+vRprFu3Ti/W2+dWUFCAI0eOQCaTYceOHfD19cXRo0eN3sfFxQUSicToB1FHjx6NcePGYcuWLSY/3Hn58mW4ubmZnAcRPTyYSBMR9SAvLw/ffvstDh8+rFfu5uaGCxcu6CWE9/Kc5H8ncp2dnaiqqsK4ceMAAE888QRqa2sxatQojBkzRu/V2+QZAIYOHQoPDw+DI9kOHz4MPz+/Xo/j4+MDW1vbXh+N5+bmhmvXruGvv/7SlRl7dr6+vliyZAlKS0vx6quvoqCgAMA/q8pdXV16bUNCQtDV1YWWlhaDZ+Lu7t5jTCEhIUhPT8fhw4cxfvx4KJVKo+0GDx4Mf39/nDp1yqDO1dUVKpUKWq0Wc+fONXhzoNVq8ffffyMkJKTHeIjowcdEmoioB4GBgYiNjTU4tmzq1Kn4888/sXr1ami1WuTn5+O77767Z/fNz89HcXEx6urqkJSUhNbWViQkJAAAkpKScPnyZcTExKCiogK//PILSktLkZCQYJBg9iQ1NRUKhQI7duxAfX090tLSoNFokJKS0usxbGxs8P7772PZsmXYunUrtFotjh49is2bNxttP3HiRNjZ2WH58uVoaGiAUqlEYWGhrv7GjRtITk6GWq1GU1MTysvLUVlZqUvuR40ahevXr+PAgQO4ePEi2tra4Ovri9jYWMTFxWHXrl1obGxEZWUlFAoF9u/fbzL2xsZGpKen48iRI2hqakJpaSnOnDlzxzcSkZGRJs+DHj58OFQqFerq6hATE4POzk5d3cGDB/HYY4/dcYsKET08mEgTEfXChx9+aLAVwc/PD5999hny8/MRFBSEiooKkydamCMvLw8KhQJBQUE4ePAg9uzZA1dXVwCAh4cHysvL0dXVhcjISIwfPx4pKSlwdHTU24/dG++++y7kcjnkcjkCAwNRUlKCvXv3wsfH567GycjIgFwuR2ZmJvz8/DB37lyDfd+3ODs7Y9u2bdi/fz8CAwNRVFSkd6yelZUVLl26hLi4OPj6+iI6OhrPP/88srOzAQAymQyLFi3C3Llz4ebmhtWrVwP4Z4tGXFwc5HI5xo4di6ioKBw7dgyPPvqoybjt7OxQV1eH1157Db6+vli4cCGSk5Px1ltvmeyTmJiI/fv348qVK0br3d3doVKpcPLkScTGxure3BQVFSExMfGOz5GIHh4ScftvBiIiIupRdHS0bjtIb/z888+IiIjAmTNn4OjoeJ+jI6K+wBVpIiIiM3z00UdGT00x5fz589i6dSuTaKJ+hCvSRERERERm4Io0EREREZEZmEgTEREREZmBiTQRERERkRmYSBMRERERmYGJNBERERGRGZhIExERERGZgYk0EREREZEZmEgTEREREZmBiTQRERERkRn+AzcgIk5xqhu1AAAAAElFTkSuQmCC",
|
||
"text/plain": [
|
||
"<Figure size 800x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"from sklearn.cluster import KMeans\n",
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"\n",
|
||
"columns_for_clustering = [\"percent_campaign_opened\", 'nb_tickets', 'average_price', 'nb_purchases',\n",
|
||
" 'average_purchase_delay', 'average_price_basket', 'average_ticket_basket',\n",
|
||
" 'nb_categories', 'nb_suppliers']\n",
|
||
"\n",
|
||
"scaler = StandardScaler()\n",
|
||
"X = scaler.fit_transform(customer_event1[columns_for_clustering])\n",
|
||
"\n",
|
||
"inertia = []\n",
|
||
"for i in range(1, 11):\n",
|
||
" kmeans = KMeans(n_clusters=i, random_state=42)\n",
|
||
" kmeans.fit(X)\n",
|
||
" inertia.append(kmeans.inertia_)\n",
|
||
"\n",
|
||
"# Plot the elbow curve to find the optimal k\n",
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"plt.plot(range(1, 11), inertia, marker='o', linestyle='-', color='b')\n",
|
||
"plt.xlabel('Number of clusters (k)')\n",
|
||
"plt.ylabel('Inertia (Within-cluster sum of squares)')\n",
|
||
"plt.title('Elbow Method for Optimal k')\n",
|
||
"plt.grid()\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"id": "4da7d97e-9128-4e4a-a454-1451d2dfee40",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Cluster 1:\n",
|
||
"percent_campaign_opened 23.552766\n",
|
||
"nb_tickets 4.831229\n",
|
||
"average_price 9.073833\n",
|
||
"nb_purchases 1.178776\n",
|
||
"average_purchase_delay -243.688956\n",
|
||
"average_price_basket 19.548741\n",
|
||
"average_ticket_basket 3.755214\n",
|
||
"nb_categories 1.123118\n",
|
||
"nb_suppliers 1.000000\n",
|
||
"Name: 0, dtype: float64\n",
|
||
"Size: 20590\n",
|
||
"\n",
|
||
"Cluster 2:\n",
|
||
"percent_campaign_opened 35.050398\n",
|
||
"nb_tickets 36.677136\n",
|
||
"average_price 13.146019\n",
|
||
"nb_purchases 4.533920\n",
|
||
"average_purchase_delay 40.550181\n",
|
||
"average_price_basket 54.766752\n",
|
||
"average_ticket_basket 9.094874\n",
|
||
"nb_categories 1.918342\n",
|
||
"nb_suppliers 2.036432\n",
|
||
"Name: 1, dtype: float64\n",
|
||
"Size: 796\n",
|
||
"\n",
|
||
"Cluster 3:\n",
|
||
"percent_campaign_opened 49.430524\n",
|
||
"nb_tickets 9085.000000\n",
|
||
"average_price 1.149821\n",
|
||
"nb_purchases 485.000000\n",
|
||
"average_purchase_delay -3.941335\n",
|
||
"average_price_basket 12.981105\n",
|
||
"average_ticket_basket 38.049343\n",
|
||
"nb_categories 13.500000\n",
|
||
"nb_suppliers 4.500000\n",
|
||
"Name: 2, dtype: float64\n",
|
||
"Size: 2\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"k = 3 \n",
|
||
"\n",
|
||
"kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
||
"customer_event1['cluster'] = kmeans.fit_predict(X)\n",
|
||
"\n",
|
||
"cluster_means = customer_event1.groupby('cluster')[columns_for_clustering].mean()\n",
|
||
"cluster_sizes = customer_event1['cluster'].value_counts()\n",
|
||
"\n",
|
||
"for cluster in range(k):\n",
|
||
" print(f\"Cluster {cluster + 1}:\")\n",
|
||
" print(cluster_means.loc[cluster])\n",
|
||
" print(f\"Size: {cluster_sizes[cluster]}\\n\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|