6891 lines
232 KiB
Plaintext
6891 lines
232 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "455cc769-1b3b-4fef-b395-e74a988ceed3",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Notebook Alexis"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 196,
|
||
"id": "20eeb149-6618-4ef2-9cfd-ff062950f36c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import os\n",
|
||
"import s3fs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 197,
|
||
"id": "30494c5e-9649-4fff-8708-617544188b20",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/1',\n",
|
||
" 'bdc2324-data/10',\n",
|
||
" 'bdc2324-data/101',\n",
|
||
" 'bdc2324-data/11',\n",
|
||
" 'bdc2324-data/12',\n",
|
||
" 'bdc2324-data/13',\n",
|
||
" 'bdc2324-data/14',\n",
|
||
" 'bdc2324-data/2',\n",
|
||
" 'bdc2324-data/3',\n",
|
||
" 'bdc2324-data/4',\n",
|
||
" 'bdc2324-data/5',\n",
|
||
" 'bdc2324-data/6',\n",
|
||
" 'bdc2324-data/7',\n",
|
||
" 'bdc2324-data/8',\n",
|
||
" 'bdc2324-data/9']"
|
||
]
|
||
},
|
||
"execution_count": 197,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"BUCKET = \"bdc2324-data\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2feffee9-9f23-4caa-8a01-9e4a93abbf5d",
|
||
"metadata": {},
|
||
"source": [
|
||
"### I. Analyse fichier 8"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f54ba449-2051-4acd-939d-d30abd5452fe",
|
||
"metadata": {},
|
||
"source": [
|
||
"This section describes the databases associated with company 8. "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 198,
|
||
"id": "f1cce705-46e1-42de-8e93-2ee15312d288",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"directory_path = '8'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 199,
|
||
"id": "82d4db0e-0cd5-49af-a4d3-f17f54b1c03c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8campaign_stats.csv\n",
|
||
"bdc2324-data/8/8campaigns.csv\n",
|
||
"bdc2324-data/8/8categories.csv\n",
|
||
"bdc2324-data/8/8countries.csv\n",
|
||
"bdc2324-data/8/8currencies.csv\n",
|
||
"bdc2324-data/8/8customer_target_mappings.csv\n",
|
||
"bdc2324-data/8/8customersplus.csv\n",
|
||
"bdc2324-data/8/8event_types.csv\n",
|
||
"bdc2324-data/8/8events.csv\n",
|
||
"bdc2324-data/8/8facilities.csv\n",
|
||
"bdc2324-data/8/8link_stats.csv\n",
|
||
"bdc2324-data/8/8pricing_formulas.csv\n",
|
||
"bdc2324-data/8/8product_packs.csv\n",
|
||
"bdc2324-data/8/8products.csv\n",
|
||
"bdc2324-data/8/8products_groups.csv\n",
|
||
"bdc2324-data/8/8purchases.csv\n",
|
||
"bdc2324-data/8/8representation_category_capacities.csv\n",
|
||
"bdc2324-data/8/8representations.csv\n",
|
||
"bdc2324-data/8/8seasons.csv\n",
|
||
"bdc2324-data/8/8suppliers.csv\n",
|
||
"bdc2324-data/8/8target_types.csv\n",
|
||
"bdc2324-data/8/8targets.csv\n",
|
||
"bdc2324-data/8/8tickets.csv\n",
|
||
"bdc2324-data/8/8type_of_categories.csv\n",
|
||
"bdc2324-data/8/8type_of_pricing_formulas.csv\n",
|
||
"bdc2324-data/8/8type_ofs.csv\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# check the files in the directory\n",
|
||
"\n",
|
||
"objects = fs.ls(f'{BUCKET}/{directory_path}')\n",
|
||
"\n",
|
||
"for file in objects:\n",
|
||
" print(file)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 200,
|
||
"id": "65cb38ad-52ae-4266-85d8-c47d81b00283",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def display_databases(file_name):\n",
|
||
" \"\"\"\n",
|
||
" This function returns the file from s3 storage\n",
|
||
" \"\"\"\n",
|
||
" file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
" print(\"File path : \", file_path)\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
" print(\"Shape : \", df.shape)\n",
|
||
" return df\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ddd545ef-7e9f-4696-962a-115294991641",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Lookt at campaigns files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 201,
|
||
"id": "0214d30d-5f83-498f-867f-e67b5793b731",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8campaigns.csv\n",
|
||
"Shape : (1689, 11)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>service_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>process_id</th>\n",
|
||
" <th>report_url</th>\n",
|
||
" <th>category</th>\n",
|
||
" <th>to_be_synced</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>sent_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>#LOUSFP RELANCE P'TITS LOU</td>\n",
|
||
" <td>1436</td>\n",
|
||
" <td>2022-02-01 15:22:53.564432+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.564432+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>eaa32c96f620053cf442ad32258076b9</td>\n",
|
||
" <td>2022-01-31 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>#LOUSFP BRASSERIE ACHETEURS</td>\n",
|
||
" <td>1435</td>\n",
|
||
" <td>2022-02-01 15:22:53.572592+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.572592+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1f3202d820180a39f736f20fce790de8</td>\n",
|
||
" <td>2022-01-31 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>PRESSE. LOU/SF Paris - RDV et protocole</td>\n",
|
||
" <td>1433</td>\n",
|
||
" <td>2022-02-01 15:22:53.578426+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.578426+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>b069b3415151fa7217e870017374de7c</td>\n",
|
||
" <td>2022-01-31 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>#LOUSFP ÉTUDIANTS</td>\n",
|
||
" <td>1432</td>\n",
|
||
" <td>2022-02-01 15:22:53.584235+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.584235+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>56468d5607a5aaf1604ff5e15593b003</td>\n",
|
||
" <td>2022-01-27 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>#LOUSFP P'TITS LOU</td>\n",
|
||
" <td>1431</td>\n",
|
||
" <td>2022-02-01 15:22:53.590187+01:00</td>\n",
|
||
" <td>2022-02-01 15:22:53.590187+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>e11943a6031a0e6114ae69c257617980</td>\n",
|
||
" <td>2022-01-27 00:00:00+01:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name service_id \\\n",
|
||
"0 1 #LOUSFP RELANCE P'TITS LOU 1436 \n",
|
||
"1 2 #LOUSFP BRASSERIE ACHETEURS 1435 \n",
|
||
"2 3 PRESSE. LOU/SF Paris - RDV et protocole 1433 \n",
|
||
"3 4 #LOUSFP ÉTUDIANTS 1432 \n",
|
||
"4 5 #LOUSFP P'TITS LOU 1431 \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2022-02-01 15:22:53.564432+01:00 2022-02-01 15:22:53.564432+01:00 \n",
|
||
"1 2022-02-01 15:22:53.572592+01:00 2022-02-01 15:22:53.572592+01:00 \n",
|
||
"2 2022-02-01 15:22:53.578426+01:00 2022-02-01 15:22:53.578426+01:00 \n",
|
||
"3 2022-02-01 15:22:53.584235+01:00 2022-02-01 15:22:53.584235+01:00 \n",
|
||
"4 2022-02-01 15:22:53.590187+01:00 2022-02-01 15:22:53.590187+01:00 \n",
|
||
"\n",
|
||
" process_id report_url category to_be_synced \\\n",
|
||
"0 NaN NaN 0 False \n",
|
||
"1 NaN NaN 0 False \n",
|
||
"2 NaN NaN 0 False \n",
|
||
"3 NaN NaN 0 False \n",
|
||
"4 NaN NaN 0 False \n",
|
||
"\n",
|
||
" identifier sent_at \n",
|
||
"0 eaa32c96f620053cf442ad32258076b9 2022-01-31 00:00:00+01:00 \n",
|
||
"1 1f3202d820180a39f736f20fce790de8 2022-01-31 00:00:00+01:00 \n",
|
||
"2 b069b3415151fa7217e870017374de7c 2022-01-31 00:00:00+01:00 \n",
|
||
"3 56468d5607a5aaf1604ff5e15593b003 2022-01-27 00:00:00+01:00 \n",
|
||
"4 e11943a6031a0e6114ae69c257617980 2022-01-27 00:00:00+01:00 "
|
||
]
|
||
},
|
||
"execution_count": 201,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"campaigns = display_databases(\"8campaigns.csv\")\n",
|
||
"campaigns.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 202,
|
||
"id": "e7982be4-2c42-4a91-be5a-329a999644cc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8campaign_stats.csv\n",
|
||
"Shape : (2527083, 8)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>campaign_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>opened_at</th>\n",
|
||
" <th>sent_at</th>\n",
|
||
" <th>delivered_at</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>161410</td>\n",
|
||
" <td>2022-02-02 18:16:07+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:16:08.616899+01:00</td>\n",
|
||
" <td>2022-02-02 17:16:08.623098+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>54228</td>\n",
|
||
" <td>2022-02-02 18:18:11+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:18:12.030260+01:00</td>\n",
|
||
" <td>2022-02-02 17:18:12.036606+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>120794</td>\n",
|
||
" <td>2022-02-02 18:18:58+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:19:00.129697+01:00</td>\n",
|
||
" <td>2022-02-02 17:19:00.134704+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>467025</td>\n",
|
||
" <td>2022-02-02 18:19:33+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:19:34.023492+01:00</td>\n",
|
||
" <td>2022-02-02 17:19:34.027570+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>142106</td>\n",
|
||
" <td>2022-02-02 18:19:35+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2022-02-02 17:19:36.553321+01:00</td>\n",
|
||
" <td>2022-02-02 17:19:36.557473+01:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id campaign_id customer_id opened_at sent_at \\\n",
|
||
"0 1 5 161410 2022-02-02 18:16:07+01:00 NaN \n",
|
||
"1 2 1 54228 2022-02-02 18:18:11+01:00 NaN \n",
|
||
"2 3 6 120794 2022-02-02 18:18:58+01:00 NaN \n",
|
||
"3 4 3 467025 2022-02-02 18:19:33+01:00 NaN \n",
|
||
"4 5 2 142106 2022-02-02 18:19:35+01:00 NaN \n",
|
||
"\n",
|
||
" delivered_at created_at \\\n",
|
||
"0 NaN 2022-02-02 17:16:08.616899+01:00 \n",
|
||
"1 NaN 2022-02-02 17:18:12.030260+01:00 \n",
|
||
"2 NaN 2022-02-02 17:19:00.129697+01:00 \n",
|
||
"3 NaN 2022-02-02 17:19:34.023492+01:00 \n",
|
||
"4 NaN 2022-02-02 17:19:36.553321+01:00 \n",
|
||
"\n",
|
||
" updated_at \n",
|
||
"0 2022-02-02 17:16:08.623098+01:00 \n",
|
||
"1 2022-02-02 17:18:12.036606+01:00 \n",
|
||
"2 2022-02-02 17:19:00.134704+01:00 \n",
|
||
"3 2022-02-02 17:19:34.027570+01:00 \n",
|
||
"4 2022-02-02 17:19:36.557473+01:00 "
|
||
]
|
||
},
|
||
"execution_count": 202,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"campaign_stats = display_databases(\"8campaign_stats.csv\")\n",
|
||
"campaign_stats.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e6512bc9-91f5-4fe4-a637-a4e84dc497a9",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Look at links files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "28e7c1fe-470f-4d84-87b8-a711a973500b",
|
||
"metadata": {},
|
||
"source": [
|
||
"There is no links file for these company. Only the link_stats file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 203,
|
||
"id": "e973575b-4ed6-4b23-8024-f383ac82e87c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8link_stats.csv\n",
|
||
"Shape : (108461, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>clicked_at</th>\n",
|
||
" <th>link_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-02-02 18:33:17+01:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>62137</td>\n",
|
||
" <td>2022-02-02 17:33:19.237759+01:00</td>\n",
|
||
" <td>2022-02-02 17:33:19.237759+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2022-02-02 18:33:26+01:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>556048</td>\n",
|
||
" <td>2022-02-02 17:33:28.101943+01:00</td>\n",
|
||
" <td>2022-02-02 17:33:28.101943+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2022-02-02 18:33:49+01:00</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>194456</td>\n",
|
||
" <td>2022-02-02 17:33:50.595125+01:00</td>\n",
|
||
" <td>2022-02-02 17:33:50.595125+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2022-02-02 18:34:19+01:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>194456</td>\n",
|
||
" <td>2022-02-02 17:34:20.493986+01:00</td>\n",
|
||
" <td>2022-02-02 17:34:20.493986+01:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2022-02-02 18:34:21+01:00</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>21571</td>\n",
|
||
" <td>2022-02-02 17:34:22.300427+01:00</td>\n",
|
||
" <td>2022-02-02 17:34:22.300427+01:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id clicked_at link_id customer_id \\\n",
|
||
"0 1 2022-02-02 18:33:17+01:00 1 62137 \n",
|
||
"1 2 2022-02-02 18:33:26+01:00 1 556048 \n",
|
||
"2 3 2022-02-02 18:33:49+01:00 2 194456 \n",
|
||
"3 4 2022-02-02 18:34:19+01:00 1 194456 \n",
|
||
"4 5 2022-02-02 18:34:21+01:00 2 21571 \n",
|
||
"\n",
|
||
" created_at updated_at \n",
|
||
"0 2022-02-02 17:33:19.237759+01:00 2022-02-02 17:33:19.237759+01:00 \n",
|
||
"1 2022-02-02 17:33:28.101943+01:00 2022-02-02 17:33:28.101943+01:00 \n",
|
||
"2 2022-02-02 17:33:50.595125+01:00 2022-02-02 17:33:50.595125+01:00 \n",
|
||
"3 2022-02-02 17:34:20.493986+01:00 2022-02-02 17:34:20.493986+01:00 \n",
|
||
"4 2022-02-02 17:34:22.300427+01:00 2022-02-02 17:34:22.300427+01:00 "
|
||
]
|
||
},
|
||
"execution_count": 203,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"links_stats = display_databases(\"8link_stats.csv\")\n",
|
||
"links_stats.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "8dfcca1f-1323-413f-aa8d-3ee5ce2610a8",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyse Customersplus file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 204,
|
||
"id": "3b523575-c779-451c-a12e-a36fb4ad232c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8customersplus.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_456/2210053343.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" customersplus = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>lastname</th>\n",
|
||
" <th>firstname</th>\n",
|
||
" <th>birthdate</th>\n",
|
||
" <th>email</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>civility</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>preferred_category</th>\n",
|
||
" <th>preferred_supplier</th>\n",
|
||
" <th>preferred_formula</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>last_visiting_date</th>\n",
|
||
" <th>zipcode</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1411166</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email1411166</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2022-12-19 15:03:39.419371+01:00</td>\n",
|
||
" <td>2022-12-19 15:03:39.419371+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478498</td>\n",
|
||
" <td>lastname478498</td>\n",
|
||
" <td>firstname478498</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email478498</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 18:58:30.259053+02:00</td>\n",
|
||
" <td>2023-06-28 15:25:24.146689+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>473678</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email473678</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 18:44:04.119713+02:00</td>\n",
|
||
" <td>2021-09-17 18:44:04.124204+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>475026</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email475026</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 18:47:28.789618+02:00</td>\n",
|
||
" <td>2021-09-17 18:47:28.793958+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>487146</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>email487146</td>\n",
|
||
" <td>339167</td>\n",
|
||
" <td>2021-09-17 19:10:24.070460+02:00</td>\n",
|
||
" <td>2021-09-17 19:10:24.076033+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1594</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 43 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id lastname firstname birthdate email \\\n",
|
||
"0 1411166 NaN NaN NaN email1411166 \n",
|
||
"1 478498 lastname478498 firstname478498 NaN email478498 \n",
|
||
"2 473678 NaN NaN NaN email473678 \n",
|
||
"3 475026 NaN NaN NaN email475026 \n",
|
||
"4 487146 NaN NaN NaN email487146 \n",
|
||
"\n",
|
||
" street_id created_at \\\n",
|
||
"0 1 2022-12-19 15:03:39.419371+01:00 \n",
|
||
"1 339167 2021-09-17 18:58:30.259053+02:00 \n",
|
||
"2 339167 2021-09-17 18:44:04.119713+02:00 \n",
|
||
"3 339167 2021-09-17 18:47:28.789618+02:00 \n",
|
||
"4 339167 2021-09-17 19:10:24.070460+02:00 \n",
|
||
"\n",
|
||
" updated_at civility is_partner ... \\\n",
|
||
"0 2022-12-19 15:03:39.419371+01:00 NaN False ... \n",
|
||
"1 2023-06-28 15:25:24.146689+02:00 NaN False ... \n",
|
||
"2 2021-09-17 18:44:04.124204+02:00 NaN False ... \n",
|
||
"3 2021-09-17 18:47:28.793958+02:00 NaN False ... \n",
|
||
"4 2021-09-17 19:10:24.076033+02:00 NaN False ... \n",
|
||
"\n",
|
||
" preferred_category preferred_supplier preferred_formula purchase_count \\\n",
|
||
"0 NaN NaN NaN 0 \n",
|
||
"1 NaN NaN NaN 0 \n",
|
||
"2 NaN NaN NaN 0 \n",
|
||
"3 NaN NaN NaN 0 \n",
|
||
"4 NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" first_buying_date last_visiting_date zipcode country age tenant_id \n",
|
||
"0 NaN NaN NaN fr NaN 1594 \n",
|
||
"1 NaN NaN NaN NaN NaN 1594 \n",
|
||
"2 NaN NaN NaN NaN NaN 1594 \n",
|
||
"3 NaN NaN NaN NaN NaN 1594 \n",
|
||
"4 NaN NaN NaN NaN NaN 1594 \n",
|
||
"\n",
|
||
"[5 rows x 43 columns]"
|
||
]
|
||
},
|
||
"execution_count": 204,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8customersplus.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" customersplus = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"customersplus.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fe56785a-ed3c-4322-aafa-a630f97b836f",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyse Structures files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 205,
|
||
"id": "87d801fc-d19a-4c45-9b21-9b6d7a8451fd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8structures.csv\n",
|
||
"No structures database\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8structures.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" structures = pd.read_csv(file_in, sep=\",\")\n",
|
||
"except:\n",
|
||
" print(\"No structures database\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b8452558-2d32-459b-91e7-f6042345e465",
|
||
"metadata": {},
|
||
"source": [
|
||
"For Stade Français, there is no structures, tags and structure_tag_mapping databases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "285b1422-9ca9-4afd-b752-777a54aaa677",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze Target databases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 206,
|
||
"id": "b6e4c3ea-5ccf-4aec-bd2d-79a5a1194178",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8customer_target_mappings.csv\n",
|
||
"Shape : (1449147, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>460062</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>2021-09-17 20:20:24.562734+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.562734+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>460056</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>2021-09-17 20:20:24.610139+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.610139+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>460051</td>\n",
|
||
" <td>65</td>\n",
|
||
" <td>2021-09-17 20:20:24.641381+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.641381+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>460051</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>2021-09-17 20:20:24.672238+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.672238+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>460049</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>2021-09-17 20:20:24.703110+02:00</td>\n",
|
||
" <td>2021-09-17 20:20:24.703110+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_id created_at \\\n",
|
||
"0 1 460062 68 2021-09-17 20:20:24.562734+02:00 \n",
|
||
"1 2 460056 68 2021-09-17 20:20:24.610139+02:00 \n",
|
||
"2 3 460051 65 2021-09-17 20:20:24.641381+02:00 \n",
|
||
"3 4 460051 66 2021-09-17 20:20:24.672238+02:00 \n",
|
||
"4 5 460049 71 2021-09-17 20:20:24.703110+02:00 \n",
|
||
"\n",
|
||
" updated_at name extra_field \n",
|
||
"0 2021-09-17 20:20:24.562734+02:00 NaN NaN \n",
|
||
"1 2021-09-17 20:20:24.610139+02:00 NaN NaN \n",
|
||
"2 2021-09-17 20:20:24.641381+02:00 NaN NaN \n",
|
||
"3 2021-09-17 20:20:24.672238+02:00 NaN NaN \n",
|
||
"4 2021-09-17 20:20:24.703110+02:00 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 206,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8customer_target_mappings.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" customer_targets = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
"except:\n",
|
||
" print(\"No such database in s3\")\n",
|
||
"\n",
|
||
"print(\"Shape : \", customer_targets.shape)\n",
|
||
"customer_targets.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 207,
|
||
"id": "6e81a35c-3c6f-403d-9ebd-e8399ecd4263",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8targets.csv\n",
|
||
"Shape : (331, 5)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>target_type_id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>ÉTUDIANTS (OPÉ PANIERS) 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.879995+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.879995+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>EFFECTIF + STAFF 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.894758+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.894758+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Acheteurs LOU / USAP</td>\n",
|
||
" <td>2021-09-17 18:10:40.911969+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.911969+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Liste Compensation 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.928796+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.928796+02:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Partenaires 21-22</td>\n",
|
||
" <td>2021-09-17 18:10:40.945476+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.945476+02:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id target_type_id name \\\n",
|
||
"0 1 1 ÉTUDIANTS (OPÉ PANIERS) 21-22 \n",
|
||
"1 2 1 EFFECTIF + STAFF 21-22 \n",
|
||
"2 3 1 Acheteurs LOU / USAP \n",
|
||
"3 4 1 Liste Compensation 21-22 \n",
|
||
"4 5 1 Partenaires 21-22 \n",
|
||
"\n",
|
||
" created_at updated_at \n",
|
||
"0 2021-09-17 18:10:40.879995+02:00 2021-09-17 18:10:40.879995+02:00 \n",
|
||
"1 2021-09-17 18:10:40.894758+02:00 2021-09-17 18:10:40.894758+02:00 \n",
|
||
"2 2021-09-17 18:10:40.911969+02:00 2021-09-17 18:10:40.911969+02:00 \n",
|
||
"3 2021-09-17 18:10:40.928796+02:00 2021-09-17 18:10:40.928796+02:00 \n",
|
||
"4 2021-09-17 18:10:40.945476+02:00 2021-09-17 18:10:40.945476+02:00 "
|
||
]
|
||
},
|
||
"execution_count": 207,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8targets.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" targets = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
"except:\n",
|
||
" print(\"No such database in s3\")\n",
|
||
"\n",
|
||
"print(\"Shape : \", targets.shape)\n",
|
||
"targets.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 208,
|
||
"id": "85696d74-3b2f-4368-9045-44db5322b60d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bdc2324-data/8/8target_types.csv\n",
|
||
"Shape : (4, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>is_import</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" <td>2021-09-17 18:10:40.864320+02:00</td>\n",
|
||
" <td>2021-09-17 18:10:40.864320+02:00</td>\n",
|
||
" <td>e34e3aa838a6eb4c41df6ed4444b796a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_dynamic_filter</td>\n",
|
||
" <td>2022-03-09 14:41:45.695407+01:00</td>\n",
|
||
" <td>2022-03-09 14:41:45.695407+01:00</td>\n",
|
||
" <td>e0f4b8693184850fefd6d2a38f10584e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" <td>2022-04-01 17:02:49.588910+02:00</td>\n",
|
||
" <td>2022-04-01 17:02:49.588910+02:00</td>\n",
|
||
" <td>fb27e81baa4debc6a4e1a8639c20e808</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>manual_import</td>\n",
|
||
" <td>2022-05-06 14:26:01.923160+02:00</td>\n",
|
||
" <td>2022-05-06 14:26:01.923160+02:00</td>\n",
|
||
" <td>12213df2ce68a624e4c0070521437bac</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id is_import name created_at \\\n",
|
||
"0 1 NaN manual_static_filter 2021-09-17 18:10:40.864320+02:00 \n",
|
||
"1 2 False manual_dynamic_filter 2022-03-09 14:41:45.695407+01:00 \n",
|
||
"2 3 False manual_static_filter 2022-04-01 17:02:49.588910+02:00 \n",
|
||
"3 4 True manual_import 2022-05-06 14:26:01.923160+02:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-09-17 18:10:40.864320+02:00 e34e3aa838a6eb4c41df6ed4444b796a \n",
|
||
"1 2022-03-09 14:41:45.695407+01:00 e0f4b8693184850fefd6d2a38f10584e \n",
|
||
"2 2022-04-01 17:02:49.588910+02:00 fb27e81baa4debc6a4e1a8639c20e808 \n",
|
||
"3 2022-05-06 14:26:01.923160+02:00 12213df2ce68a624e4c0070521437bac "
|
||
]
|
||
},
|
||
"execution_count": 208,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"file_name = \"8target_types.csv\"\n",
|
||
"file_path = BUCKET + \"/\" + directory_path + \"/\" + file_name\n",
|
||
"print(file_path)\n",
|
||
"try:\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" target_types = pd.read_csv(file_in, sep=\",\")\n",
|
||
" \n",
|
||
"except:\n",
|
||
" print(\"No such database in s3\")\n",
|
||
"\n",
|
||
"print(\"Shape : \", target_types.shape)\n",
|
||
"target_types.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "cdc6416b-3deb-446c-8957-435745b93533",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze consumption files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f8622bd5-a5ab-403f-ab01-758aec879ee4",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning consumptions.csv, suppliers.csv, tickets.csv and purchases.csv\n",
|
||
"\n",
|
||
"However, there is no consumptions.csv file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 209,
|
||
"id": "7c57529b-2ffb-4039-9795-b27c6fbd54a4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8purchases.csv\n",
|
||
"Shape : (975703, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>number</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>119609</td>\n",
|
||
" <td>2017-09-09 15:39:45.913000+02:00</td>\n",
|
||
" <td>1149</td>\n",
|
||
" <td>2021-06-29 21:52:21.816195+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:21.816195+02:00</td>\n",
|
||
" <td>193416</td>\n",
|
||
" <td>f2956e2d53321317e7c15c1cb992156c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>119610</td>\n",
|
||
" <td>2017-09-09 15:39:46.033000+02:00</td>\n",
|
||
" <td>1149</td>\n",
|
||
" <td>2021-06-29 21:52:21.817846+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:21.817846+02:00</td>\n",
|
||
" <td>193416</td>\n",
|
||
" <td>faabab441b2668a85bb484490b2166c3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>5464</td>\n",
|
||
" <td>2017-07-24 19:44:11.923000+02:00</td>\n",
|
||
" <td>1251</td>\n",
|
||
" <td>2021-06-29 21:33:45.604224+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:45.604224+02:00</td>\n",
|
||
" <td>184354</td>\n",
|
||
" <td>f63c69fa585ce4f91681f0d9ebeb770f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>119613</td>\n",
|
||
" <td>2017-09-10 11:25:45.820000+02:00</td>\n",
|
||
" <td>12558</td>\n",
|
||
" <td>2021-06-29 21:52:21.822033+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:21.822033+02:00</td>\n",
|
||
" <td>193462</td>\n",
|
||
" <td>ffce5fd8d2348eb6885d0ee9c7bd017c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1422860</td>\n",
|
||
" <td>2018-10-08 10:30:42.980000+02:00</td>\n",
|
||
" <td>17935</td>\n",
|
||
" <td>2021-07-16 04:20:55.347369+02:00</td>\n",
|
||
" <td>2021-07-16 04:20:55.347369+02:00</td>\n",
|
||
" <td>247459</td>\n",
|
||
" <td>193e41eae8ee078537107a569c0426ef</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id purchase_date customer_id \\\n",
|
||
"0 119609 2017-09-09 15:39:45.913000+02:00 1149 \n",
|
||
"1 119610 2017-09-09 15:39:46.033000+02:00 1149 \n",
|
||
"2 5464 2017-07-24 19:44:11.923000+02:00 1251 \n",
|
||
"3 119613 2017-09-10 11:25:45.820000+02:00 12558 \n",
|
||
"4 1422860 2018-10-08 10:30:42.980000+02:00 17935 \n",
|
||
"\n",
|
||
" created_at updated_at number \\\n",
|
||
"0 2021-06-29 21:52:21.816195+02:00 2021-06-29 21:52:21.816195+02:00 193416 \n",
|
||
"1 2021-06-29 21:52:21.817846+02:00 2021-06-29 21:52:21.817846+02:00 193416 \n",
|
||
"2 2021-06-29 21:33:45.604224+02:00 2021-06-29 21:33:45.604224+02:00 184354 \n",
|
||
"3 2021-06-29 21:52:21.822033+02:00 2021-06-29 21:52:21.822033+02:00 193462 \n",
|
||
"4 2021-07-16 04:20:55.347369+02:00 2021-07-16 04:20:55.347369+02:00 247459 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 f2956e2d53321317e7c15c1cb992156c \n",
|
||
"1 faabab441b2668a85bb484490b2166c3 \n",
|
||
"2 f63c69fa585ce4f91681f0d9ebeb770f \n",
|
||
"3 ffce5fd8d2348eb6885d0ee9c7bd017c \n",
|
||
"4 193e41eae8ee078537107a569c0426ef "
|
||
]
|
||
},
|
||
"execution_count": 209,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"purchases = display_databases(\"8purchases.csv\")\n",
|
||
"purchases.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 210,
|
||
"id": "903321fb-99f8-475d-b4a6-c70ec2efe190",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8tickets.csv\n",
|
||
"Shape : (2370152, 11)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>number</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>product_id</th>\n",
|
||
" <th>is_from_subscription</th>\n",
|
||
" <th>type_of</th>\n",
|
||
" <th>supplier_id</th>\n",
|
||
" <th>barcode</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>254164</td>\n",
|
||
" <td>193416_763837_650_688_326212</td>\n",
|
||
" <td>2021-06-29 21:53:14.951871+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.951871+02:00</td>\n",
|
||
" <td>119609</td>\n",
|
||
" <td>3334</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9ec3b5617fc54512acf131aa5fa26870</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>254165</td>\n",
|
||
" <td>193416_763838_650_688_326236</td>\n",
|
||
" <td>2021-06-29 21:53:14.953717+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.953717+02:00</td>\n",
|
||
" <td>119610</td>\n",
|
||
" <td>3334</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>b227c664e2574a919672683f5cc4c98e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>254168</td>\n",
|
||
" <td>193462_763921_649_687_305676</td>\n",
|
||
" <td>2021-06-29 21:53:14.958207+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.958207+02:00</td>\n",
|
||
" <td>119613</td>\n",
|
||
" <td>3432</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>28ac507ad84a30993bdfc0996fd2476b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>254169</td>\n",
|
||
" <td>193462_763922_649_687_305653</td>\n",
|
||
" <td>2021-06-29 21:53:14.959681+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.959681+02:00</td>\n",
|
||
" <td>119614</td>\n",
|
||
" <td>3268</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>131dbaeef23f5ac2271bf0266ce35476</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>254170</td>\n",
|
||
" <td>193462_763923_649_687_305630</td>\n",
|
||
" <td>2021-06-29 21:53:14.961157+02:00</td>\n",
|
||
" <td>2021-06-29 21:53:14.961157+02:00</td>\n",
|
||
" <td>119615</td>\n",
|
||
" <td>3268</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1a6342ad2c213b626aa55e5374cd661a</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id number created_at \\\n",
|
||
"0 254164 193416_763837_650_688_326212 2021-06-29 21:53:14.951871+02:00 \n",
|
||
"1 254165 193416_763838_650_688_326236 2021-06-29 21:53:14.953717+02:00 \n",
|
||
"2 254168 193462_763921_649_687_305676 2021-06-29 21:53:14.958207+02:00 \n",
|
||
"3 254169 193462_763922_649_687_305653 2021-06-29 21:53:14.959681+02:00 \n",
|
||
"4 254170 193462_763923_649_687_305630 2021-06-29 21:53:14.961157+02:00 \n",
|
||
"\n",
|
||
" updated_at purchase_id product_id \\\n",
|
||
"0 2021-06-29 21:53:14.951871+02:00 119609 3334 \n",
|
||
"1 2021-06-29 21:53:14.953717+02:00 119610 3334 \n",
|
||
"2 2021-06-29 21:53:14.958207+02:00 119613 3432 \n",
|
||
"3 2021-06-29 21:53:14.959681+02:00 119614 3268 \n",
|
||
"4 2021-06-29 21:53:14.961157+02:00 119615 3268 \n",
|
||
"\n",
|
||
" is_from_subscription type_of supplier_id barcode \\\n",
|
||
"0 False 1 2 NaN \n",
|
||
"1 False 1 2 NaN \n",
|
||
"2 False 1 2 NaN \n",
|
||
"3 False 1 2 NaN \n",
|
||
"4 False 1 2 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 9ec3b5617fc54512acf131aa5fa26870 \n",
|
||
"1 b227c664e2574a919672683f5cc4c98e \n",
|
||
"2 28ac507ad84a30993bdfc0996fd2476b \n",
|
||
"3 131dbaeef23f5ac2271bf0266ce35476 \n",
|
||
"4 1a6342ad2c213b626aa55e5374cd661a "
|
||
]
|
||
},
|
||
"execution_count": 210,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tickets = display_databases(\"8tickets.csv\")\n",
|
||
"tickets.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 211,
|
||
"id": "243e6942-0233-4cd5-b32b-e005457131d2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8suppliers.csv\n",
|
||
"Shape : (16, 9)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>manually_added</th>\n",
|
||
" <th>label</th>\n",
|
||
" <th>itr</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>commission</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>152</td>\n",
|
||
" <td>plateformeceweb</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-07-16 00:02:17.805193+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.805193+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0fc934f49bfa9f1f4e6ab7e2593b6839</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>accreditation annuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-06-29 21:33:14.138349+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.138349+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fe13238540e0ff293ec8aad29aeae6c3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>68</td>\n",
|
||
" <td>abonnement parking</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-06-29 22:10:31.167367+02:00</td>\n",
|
||
" <td>2021-06-29 22:10:31.167367+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0f7defc52a97cdca533af74f4e6e5b1e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>accreditation match</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-06-29 21:33:14.142084+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.142084+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>40e19a7c4824eaad298e0107ed7e3691</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>154</td>\n",
|
||
" <td>web lnr-lou</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-07-16 00:02:17.806521+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.806521+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>b144dd617807b02e0d9002fac6c61768</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name manually_added label itr \\\n",
|
||
"0 152 plateformeceweb False NaN NaN \n",
|
||
"1 6 accreditation annuelle False NaN NaN \n",
|
||
"2 68 abonnement parking False NaN NaN \n",
|
||
"3 9 accreditation match False NaN NaN \n",
|
||
"4 154 web lnr-lou False NaN NaN \n",
|
||
"\n",
|
||
" updated_at created_at \\\n",
|
||
"0 2021-07-16 00:02:17.805193+02:00 2021-07-16 00:02:17.805193+02:00 \n",
|
||
"1 2021-06-29 21:33:14.138349+02:00 2021-06-29 21:33:14.138349+02:00 \n",
|
||
"2 2021-06-29 22:10:31.167367+02:00 2021-06-29 22:10:31.167367+02:00 \n",
|
||
"3 2021-06-29 21:33:14.142084+02:00 2021-06-29 21:33:14.142084+02:00 \n",
|
||
"4 2021-07-16 00:02:17.806521+02:00 2021-07-16 00:02:17.806521+02:00 \n",
|
||
"\n",
|
||
" commission identifier \n",
|
||
"0 NaN 0fc934f49bfa9f1f4e6ab7e2593b6839 \n",
|
||
"1 NaN fe13238540e0ff293ec8aad29aeae6c3 \n",
|
||
"2 NaN 0f7defc52a97cdca533af74f4e6e5b1e \n",
|
||
"3 NaN 40e19a7c4824eaad298e0107ed7e3691 \n",
|
||
"4 NaN b144dd617807b02e0d9002fac6c61768 "
|
||
]
|
||
},
|
||
"execution_count": 211,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"suppliers = display_databases(\"8suppliers.csv\")\n",
|
||
"suppliers.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fd8c876a-f0c5-4123-a422-c267af5f29b1",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyse product file"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 212,
|
||
"id": "6b82efce-1dee-4d89-8585-28c4ad477eef",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8products.csv\n",
|
||
"Shape : (45411, 14)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>90013</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1961</td>\n",
|
||
" <td>912</td>\n",
|
||
" <td>2021-07-16 04:56:05.797551+02:00</td>\n",
|
||
" <td>2021-07-16 04:56:05.797551+02:00</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>87917</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>476e111175b1660688b7c13dade2b57e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>662</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>2021-06-29 21:33:17.389201+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:17.389201+02:00</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>640</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2c765698e9bedd48e8a3fd27dc8dbc97</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>646</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>46</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2021-06-29 21:33:17.366742+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:17.366742+02:00</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>624</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4e719148651fd7f175e3fb51bdb5d31b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5703</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>188</td>\n",
|
||
" <td>2021-06-29 21:52:09.374365+02:00</td>\n",
|
||
" <td>2021-06-29 21:52:09.374365+02:00</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5540</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>e4d7beeb0a631e2e51e61951623ba9b1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>648</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>49</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2021-06-29 21:33:17.369471+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:17.369471+02:00</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>626</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>07a5dd9e125345b9458651ab73605255</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id amount is_full_price representation_id pricing_formula_id \\\n",
|
||
"0 90013 0.0 False 1961 912 \n",
|
||
"1 662 0.0 False 11 29 \n",
|
||
"2 646 0.0 False 46 10 \n",
|
||
"3 5703 5.0 False 7 188 \n",
|
||
"4 648 0.0 False 49 10 \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2021-07-16 04:56:05.797551+02:00 2021-07-16 04:56:05.797551+02:00 \n",
|
||
"1 2021-06-29 21:33:17.389201+02:00 2021-06-29 21:33:17.389201+02:00 \n",
|
||
"2 2021-06-29 21:33:17.366742+02:00 2021-06-29 21:33:17.366742+02:00 \n",
|
||
"3 2021-06-29 21:52:09.374365+02:00 2021-06-29 21:52:09.374365+02:00 \n",
|
||
"4 2021-06-29 21:33:17.369471+02:00 2021-06-29 21:33:17.369471+02:00 \n",
|
||
"\n",
|
||
" category_id apply_price products_group_id product_pack_id extra_field \\\n",
|
||
"0 34 0.0 87917 1 NaN \n",
|
||
"1 16 0.0 640 1 NaN \n",
|
||
"2 15 0.0 624 1 NaN \n",
|
||
"3 4 0.0 5540 1 NaN \n",
|
||
"4 15 0.0 626 1 NaN \n",
|
||
"\n",
|
||
" amount_consumption identifier \n",
|
||
"0 NaN 476e111175b1660688b7c13dade2b57e \n",
|
||
"1 NaN 2c765698e9bedd48e8a3fd27dc8dbc97 \n",
|
||
"2 NaN 4e719148651fd7f175e3fb51bdb5d31b \n",
|
||
"3 NaN e4d7beeb0a631e2e51e61951623ba9b1 \n",
|
||
"4 NaN 07a5dd9e125345b9458651ab73605255 "
|
||
]
|
||
},
|
||
"execution_count": 212,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products = display_databases(\"8products.csv\")\n",
|
||
"products.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "8ad143b2-2869-4bd2-982e-688498b98727",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze pricing files"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "9a54e9a5-801d-4000-9e76-e792edbf7e41",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning pricing_formulas.csv and type_of_pricing_formulas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 213,
|
||
"id": "daf37bff-a26d-4ff5-ad50-c90f917164bd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8pricing_formulas.csv\n",
|
||
"Shape : (516, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>visite stade enfant</td>\n",
|
||
" <td>2021-06-29 21:33:14.160728+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.160728+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>bbc80e5761a0ea325f6f6a5411752659</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>3229</td>\n",
|
||
" <td>tarif bloc etudiants</td>\n",
|
||
" <td>2021-07-16 04:20:46.684601+02:00</td>\n",
|
||
" <td>2021-09-03 16:44:46.096785+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>205122cc7e96d559330972b0ec0cf35a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>42</td>\n",
|
||
" <td>invitation eiffage</td>\n",
|
||
" <td>2021-06-29 21:33:14.204483+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.204483+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>e4e6365c02e2a7b01ebe2ce8ace624f2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4379</td>\n",
|
||
" <td>invitation offre speciale</td>\n",
|
||
" <td>2021-07-16 05:21:44.984893+02:00</td>\n",
|
||
" <td>2021-07-16 05:21:44.984893+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>307817b6205535a35915a64027ee161e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2641</td>\n",
|
||
" <td>prevente reabo enfant</td>\n",
|
||
" <td>2021-07-16 03:47:40.896805+02:00</td>\n",
|
||
" <td>2021-09-03 16:08:35.304298+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>478eb63c71ba35d8d3d64c8637dafdee</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 7 visite stade enfant 2021-06-29 21:33:14.160728+02:00 \n",
|
||
"1 3229 tarif bloc etudiants 2021-07-16 04:20:46.684601+02:00 \n",
|
||
"2 42 invitation eiffage 2021-06-29 21:33:14.204483+02:00 \n",
|
||
"3 4379 invitation offre speciale 2021-07-16 05:21:44.984893+02:00 \n",
|
||
"4 2641 prevente reabo enfant 2021-07-16 03:47:40.896805+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field \\\n",
|
||
"0 2021-06-29 21:33:14.160728+02:00 NaN \n",
|
||
"1 2021-09-03 16:44:46.096785+02:00 NaN \n",
|
||
"2 2021-06-29 21:33:14.204483+02:00 NaN \n",
|
||
"3 2021-07-16 05:21:44.984893+02:00 NaN \n",
|
||
"4 2021-09-03 16:08:35.304298+02:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 bbc80e5761a0ea325f6f6a5411752659 \n",
|
||
"1 205122cc7e96d559330972b0ec0cf35a \n",
|
||
"2 e4e6365c02e2a7b01ebe2ce8ace624f2 \n",
|
||
"3 307817b6205535a35915a64027ee161e \n",
|
||
"4 478eb63c71ba35d8d3d64c8637dafdee "
|
||
]
|
||
},
|
||
"execution_count": 213,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pricing_formulas = display_databases(\"8pricing_formulas.csv\")\n",
|
||
"pricing_formulas.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 214,
|
||
"id": "cdb14488-b093-4b39-84fa-1c2b4576208f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8type_of_pricing_formulas.csv\n",
|
||
"Shape : (103, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>1021</td>\n",
|
||
" <td>2021-09-03 14:17:19.816110+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.816110+02:00</td>\n",
|
||
" <td>41047fbeb7cd3e1cb2713c608d2f786d</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>4305</td>\n",
|
||
" <td>2021-09-03 14:17:19.848088+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.848088+02:00</td>\n",
|
||
" <td>a62a4dad7d62738129244bbb5ede0747</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>4306</td>\n",
|
||
" <td>2021-09-03 14:17:19.864067+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.864067+02:00</td>\n",
|
||
" <td>c3770373e09f55412068c447736d9da3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>2021-09-03 14:17:19.880078+02:00</td>\n",
|
||
" <td>2021-09-03 14:17:19.880078+02:00</td>\n",
|
||
" <td>7b7b1242ae7a8c9eb66d35d8a4348ccd</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2021-09-03 14:18:03.616081+02:00</td>\n",
|
||
" <td>2021-09-03 14:18:03.616081+02:00</td>\n",
|
||
" <td>0a2b941c46b31258c03b316aa064e86a</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id type_of_id pricing_formula_id created_at \\\n",
|
||
"0 1 7 1021 2021-09-03 14:17:19.816110+02:00 \n",
|
||
"1 2 7 4305 2021-09-03 14:17:19.848088+02:00 \n",
|
||
"2 3 7 4306 2021-09-03 14:17:19.864067+02:00 \n",
|
||
"3 4 7 29 2021-09-03 14:17:19.880078+02:00 \n",
|
||
"4 5 8 10 2021-09-03 14:18:03.616081+02:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-09-03 14:17:19.816110+02:00 41047fbeb7cd3e1cb2713c608d2f786d \n",
|
||
"1 2021-09-03 14:17:19.848088+02:00 a62a4dad7d62738129244bbb5ede0747 \n",
|
||
"2 2021-09-03 14:17:19.864067+02:00 c3770373e09f55412068c447736d9da3 \n",
|
||
"3 2021-09-03 14:17:19.880078+02:00 7b7b1242ae7a8c9eb66d35d8a4348ccd \n",
|
||
"4 2021-09-03 14:18:03.616081+02:00 0a2b941c46b31258c03b316aa064e86a "
|
||
]
|
||
},
|
||
"execution_count": 214,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type_pricing_formulas = display_databases(\"8type_of_pricing_formulas.csv\")\n",
|
||
"type_pricing_formulas.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a084297a-4fd7-4cda-b513-7704f4244a5c",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze type of products"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "76a67ea7-8720-441e-8973-23e5d105370e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning categories.csv, type_of_categories.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 215,
|
||
"id": "6582694d-5339-4f33-a943-c73033121a90",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8categories.csv\n",
|
||
"Shape : (148, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>quota</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>653</td>\n",
|
||
" <td>acces village implid</td>\n",
|
||
" <td>2021-07-16 00:04:37.181331+02:00</td>\n",
|
||
" <td>2021-07-16 00:04:37.181331+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>c447d053646a6503d3cd84d4798bf5b7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>805</td>\n",
|
||
" <td>parking organisation</td>\n",
|
||
" <td>2021-07-16 01:54:15.822407+02:00</td>\n",
|
||
" <td>2021-07-16 01:54:15.822407+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>02bf9871964345f505ad305080daec36</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>809</td>\n",
|
||
" <td>rose rouge orange</td>\n",
|
||
" <td>2021-07-16 01:54:15.825345+02:00</td>\n",
|
||
" <td>2021-07-16 01:54:15.825345+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>31fb5b57bc1a2bcd5c155fb0d9e7c0dd</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2183</td>\n",
|
||
" <td>2eme catégorie j.b. centrale</td>\n",
|
||
" <td>2021-07-16 04:37:25.446835+02:00</td>\n",
|
||
" <td>2021-07-16 04:37:25.446835+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>c9eb6651caaed42b809b3f4407a847c9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>621</td>\n",
|
||
" <td>acces brasserie</td>\n",
|
||
" <td>2021-07-16 00:02:17.249701+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.249701+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>349e6a59585d78d80d46acbc6a520c50</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 653 acces village implid 2021-07-16 00:04:37.181331+02:00 \n",
|
||
"1 805 parking organisation 2021-07-16 01:54:15.822407+02:00 \n",
|
||
"2 809 rose rouge orange 2021-07-16 01:54:15.825345+02:00 \n",
|
||
"3 2183 2eme catégorie j.b. centrale 2021-07-16 04:37:25.446835+02:00 \n",
|
||
"4 621 acces brasserie 2021-07-16 00:02:17.249701+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field quota \\\n",
|
||
"0 2021-07-16 00:04:37.181331+02:00 NaN NaN \n",
|
||
"1 2021-07-16 01:54:15.822407+02:00 NaN NaN \n",
|
||
"2 2021-07-16 01:54:15.825345+02:00 NaN NaN \n",
|
||
"3 2021-07-16 04:37:25.446835+02:00 NaN NaN \n",
|
||
"4 2021-07-16 00:02:17.249701+02:00 NaN NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 c447d053646a6503d3cd84d4798bf5b7 \n",
|
||
"1 02bf9871964345f505ad305080daec36 \n",
|
||
"2 31fb5b57bc1a2bcd5c155fb0d9e7c0dd \n",
|
||
"3 c9eb6651caaed42b809b3f4407a847c9 \n",
|
||
"4 349e6a59585d78d80d46acbc6a520c50 "
|
||
]
|
||
},
|
||
"execution_count": 215,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"categories = display_databases(\"8categories.csv\")\n",
|
||
"categories.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 216,
|
||
"id": "589076df-1958-42de-9941-1aff9fa8536f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8type_of_categories.csv\n",
|
||
"Shape : (6, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2021-08-20 15:22:05.558209+02:00</td>\n",
|
||
" <td>2021-08-20 15:22:05.558209+02:00</td>\n",
|
||
" <td>af8fa6d57f6b19a7600a69e7771c7c3a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2021-09-02 17:29:32.582002+02:00</td>\n",
|
||
" <td>2021-09-02 17:29:32.582002+02:00</td>\n",
|
||
" <td>63718e7ad306912427758ddf988ad34f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2021-09-02 17:32:38.299733+02:00</td>\n",
|
||
" <td>2021-09-02 17:32:38.299733+02:00</td>\n",
|
||
" <td>5e147d4d90888df14c4584f5c6887c96</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2021-09-02 17:35:04.748993+02:00</td>\n",
|
||
" <td>2021-09-02 17:35:04.748993+02:00</td>\n",
|
||
" <td>a9dfdc3f40b41e3018933c6167fc38a5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>2021-09-02 17:35:37.396740+02:00</td>\n",
|
||
" <td>2021-09-02 17:35:37.396740+02:00</td>\n",
|
||
" <td>c05b0061d2a875adbc35d3dfa6a50a12</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id type_of_id category_id created_at \\\n",
|
||
"0 1 1 2 2021-08-20 15:22:05.558209+02:00 \n",
|
||
"1 2 2 1 2021-09-02 17:29:32.582002+02:00 \n",
|
||
"2 3 3 3 2021-09-02 17:32:38.299733+02:00 \n",
|
||
"3 4 4 4 2021-09-02 17:35:04.748993+02:00 \n",
|
||
"4 5 5 17 2021-09-02 17:35:37.396740+02:00 \n",
|
||
"\n",
|
||
" updated_at identifier \n",
|
||
"0 2021-08-20 15:22:05.558209+02:00 af8fa6d57f6b19a7600a69e7771c7c3a \n",
|
||
"1 2021-09-02 17:29:32.582002+02:00 63718e7ad306912427758ddf988ad34f \n",
|
||
"2 2021-09-02 17:32:38.299733+02:00 5e147d4d90888df14c4584f5c6887c96 \n",
|
||
"3 2021-09-02 17:35:04.748993+02:00 a9dfdc3f40b41e3018933c6167fc38a5 \n",
|
||
"4 2021-09-02 17:35:37.396740+02:00 c05b0061d2a875adbc35d3dfa6a50a12 "
|
||
]
|
||
},
|
||
"execution_count": 216,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type_categories = display_databases(\"8type_of_categories.csv\")\n",
|
||
"type_categories.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "3427b681-4c05-4e4e-9c2b-867ee789f98c",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze type of representations"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "9381e36b-090a-44c5-a29d-3ac4c9a4431e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning representation_category_capacities.csv, representations.csv, representations_types.csv\n",
|
||
"\n",
|
||
"however there is no representation_types database"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 217,
|
||
"id": "6f06d72a-5725-4eee-8e4c-e9ef5820f346",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8representation_category_capacities.csv\n",
|
||
"Shape : (7378, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>expected_filling</th>\n",
|
||
" <th>max_filling</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>561</td>\n",
|
||
" <td>2021-06-29 21:33:14.096827+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.096827+02:00</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>571</td>\n",
|
||
" <td>2021-06-29 21:33:14.110047+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:14.110047+02:00</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>9665</td>\n",
|
||
" <td>2021-07-16 00:02:17.736387+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.736387+02:00</td>\n",
|
||
" <td>1887</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>383906</td>\n",
|
||
" <td>2023-03-04 02:55:01.585418+01:00</td>\n",
|
||
" <td>2023-03-04 02:55:01.585418+01:00</td>\n",
|
||
" <td>52729</td>\n",
|
||
" <td>476</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>393</td>\n",
|
||
" <td>2021-06-29 21:33:13.876766+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.876766+02:00</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id created_at updated_at \\\n",
|
||
"0 561 2021-06-29 21:33:14.096827+02:00 2021-06-29 21:33:14.096827+02:00 \n",
|
||
"1 571 2021-06-29 21:33:14.110047+02:00 2021-06-29 21:33:14.110047+02:00 \n",
|
||
"2 9665 2021-07-16 00:02:17.736387+02:00 2021-07-16 00:02:17.736387+02:00 \n",
|
||
"3 383906 2023-03-04 02:55:01.585418+01:00 2023-03-04 02:55:01.585418+01:00 \n",
|
||
"4 393 2021-06-29 21:33:13.876766+02:00 2021-06-29 21:33:13.876766+02:00 \n",
|
||
"\n",
|
||
" representation_id category_id expected_filling max_filling \n",
|
||
"0 17 37 NaN NaN \n",
|
||
"1 14 39 NaN NaN \n",
|
||
"2 1887 8 NaN NaN \n",
|
||
"3 52729 476 NaN NaN \n",
|
||
"4 9 23 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 217,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"representation_category_capacities = display_databases(\"8representation_category_capacities.csv\")\n",
|
||
"representation_category_capacities.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 218,
|
||
"id": "bd405913-033d-4f15-a5b9-103d577baaff",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8representations.csv\n",
|
||
"Shape : (1015, 16)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>serial</th>\n",
|
||
" <th>event_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>open</th>\n",
|
||
" <th>satisfaction</th>\n",
|
||
" <th>end_date_time</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>representation_type_id</th>\n",
|
||
" <th>expected_filling</th>\n",
|
||
" <th>max_filling</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>5903</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5836</td>\n",
|
||
" <td>2021-07-16 05:16:57.419565+02:00</td>\n",
|
||
" <td>2021-07-16 05:16:57.419565+02:00</td>\n",
|
||
" <td>2019-08-24 18:00:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8009c34cae4e79e3781f16f3ceeab244</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>67133</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>65652</td>\n",
|
||
" <td>2023-09-27 02:21:36.573001+02:00</td>\n",
|
||
" <td>2023-09-27 02:21:36.573001+02:00</td>\n",
|
||
" <td>2023-10-04 10:30:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4e9d3fc8d1f7bf563dc586548fe6390e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1874</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1826</td>\n",
|
||
" <td>2021-07-16 00:02:17.390274+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.390274+02:00</td>\n",
|
||
" <td>2019-09-14 18:00:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>19f666370c1fc781dff638c20ae04c8a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5904</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5837</td>\n",
|
||
" <td>2021-07-16 05:16:57.420302+02:00</td>\n",
|
||
" <td>2021-07-16 05:16:57.420302+02:00</td>\n",
|
||
" <td>2019-09-01 17:05:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4221acd3f49179f5d0b292c15d1ab8e4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4165</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4106</td>\n",
|
||
" <td>2021-07-16 03:53:05.929713+02:00</td>\n",
|
||
" <td>2021-07-16 03:53:05.929713+02:00</td>\n",
|
||
" <td>2018-10-14 14:00:00+02:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>733104286519c0614b2d45470eb180a1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id serial event_id created_at \\\n",
|
||
"0 5903 NaN 5836 2021-07-16 05:16:57.419565+02:00 \n",
|
||
"1 67133 NaN 65652 2023-09-27 02:21:36.573001+02:00 \n",
|
||
"2 1874 NaN 1826 2021-07-16 00:02:17.390274+02:00 \n",
|
||
"3 5904 NaN 5837 2021-07-16 05:16:57.420302+02:00 \n",
|
||
"4 4165 NaN 4106 2021-07-16 03:53:05.929713+02:00 \n",
|
||
"\n",
|
||
" updated_at start_date_time open \\\n",
|
||
"0 2021-07-16 05:16:57.419565+02:00 2019-08-24 18:00:00+02:00 True \n",
|
||
"1 2023-09-27 02:21:36.573001+02:00 2023-10-04 10:30:00+02:00 True \n",
|
||
"2 2021-07-16 00:02:17.390274+02:00 2019-09-14 18:00:00+02:00 True \n",
|
||
"3 2021-07-16 05:16:57.420302+02:00 2019-09-01 17:05:00+02:00 True \n",
|
||
"4 2021-07-16 03:53:05.929713+02:00 2018-10-14 14:00:00+02:00 True \n",
|
||
"\n",
|
||
" satisfaction end_date_time name is_display \\\n",
|
||
"0 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"1 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"2 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"3 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"4 NaN 1901-01-01 00:09:21+00:09 NaN True \n",
|
||
"\n",
|
||
" representation_type_id expected_filling max_filling extra_field \\\n",
|
||
"0 NaN NaN NaN NaN \n",
|
||
"1 NaN NaN NaN NaN \n",
|
||
"2 NaN NaN NaN NaN \n",
|
||
"3 NaN NaN NaN NaN \n",
|
||
"4 NaN NaN NaN NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 8009c34cae4e79e3781f16f3ceeab244 \n",
|
||
"1 4e9d3fc8d1f7bf563dc586548fe6390e \n",
|
||
"2 19f666370c1fc781dff638c20ae04c8a \n",
|
||
"3 4221acd3f49179f5d0b292c15d1ab8e4 \n",
|
||
"4 733104286519c0614b2d45470eb180a1 "
|
||
]
|
||
},
|
||
"execution_count": 218,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"representations = display_databases(\"8representations.csv\")\n",
|
||
"representations.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 219,
|
||
"id": "0f2c7ea3-6964-48fd-9411-17547b2c3a3f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#representation_type = display_databases(\"8representation_types.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a9b02406-2a69-4431-8d49-3c6bd6a5e1c7",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze type of events"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1d554266-282c-4f64-9a0f-ddcf591ec912",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning events.csv, event_types.csv, seasons.csv and facilities.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 220,
|
||
"id": "cba22ee2-338d-4ce1-a1e8-829a11a94bcf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8events.csv\n",
|
||
"Shape : (922, 12)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>41542</td>\n",
|
||
" <td>2022-10-29 02:54:32.756920+02:00</td>\n",
|
||
" <td>2022-10-29 02:57:35.511792+02:00</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>match lou feminin - lons</td>\n",
|
||
" <td>5588</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5588</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>40cc5a346b1af4ee7108ac28b144fb77</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>21068</td>\n",
|
||
" <td>2021-12-17 03:43:53.166446+01:00</td>\n",
|
||
" <td>2021-12-17 03:46:40.346096+01:00</td>\n",
|
||
" <td>51</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>repas brasserie lou-racing</td>\n",
|
||
" <td>2310</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2310</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>500b670b79aa592ecb06f4957800a752</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>59812</td>\n",
|
||
" <td>2023-05-26 01:45:54.321665+02:00</td>\n",
|
||
" <td>2023-05-26 01:46:01.571397+02:00</td>\n",
|
||
" <td>1501</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>parking match 2</td>\n",
|
||
" <td>10185</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>10185</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>d5f62ed879867b8b51ed7b85f1fc3ab0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3424</td>\n",
|
||
" <td>2021-07-16 03:13:06.988358+02:00</td>\n",
|
||
" <td>2021-07-16 05:33:31.321933+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>rugby + hockey sur glace</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>822b47176c355a647aa2dbdf8dfbc594</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>21379</td>\n",
|
||
" <td>2021-12-23 02:37:22.948114+01:00</td>\n",
|
||
" <td>2021-12-23 02:38:20.726329+01:00</td>\n",
|
||
" <td>51</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>bloc des etudiants lou-racing</td>\n",
|
||
" <td>2562</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2562</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17b91f19c71ff6287ffc1f44af952576</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id created_at updated_at \\\n",
|
||
"0 41542 2022-10-29 02:54:32.756920+02:00 2022-10-29 02:57:35.511792+02:00 \n",
|
||
"1 21068 2021-12-17 03:43:53.166446+01:00 2021-12-17 03:46:40.346096+01:00 \n",
|
||
"2 59812 2023-05-26 01:45:54.321665+02:00 2023-05-26 01:46:01.571397+02:00 \n",
|
||
"3 3424 2021-07-16 03:13:06.988358+02:00 2021-07-16 05:33:31.321933+02:00 \n",
|
||
"4 21379 2021-12-23 02:37:22.948114+01:00 2021-12-23 02:38:20.726329+01:00 \n",
|
||
"\n",
|
||
" season_id facility_id name event_type_id \\\n",
|
||
"0 52 1 match lou feminin - lons 5588 \n",
|
||
"1 51 1 repas brasserie lou-racing 2310 \n",
|
||
"2 1501 2 parking match 2 10185 \n",
|
||
"3 1 1 rugby + hockey sur glace 5 \n",
|
||
"4 51 1 bloc des etudiants lou-racing 2562 \n",
|
||
"\n",
|
||
" manual_added is_display event_type_key_id facility_key_id \\\n",
|
||
"0 False True 5588 1 \n",
|
||
"1 False True 2310 1 \n",
|
||
"2 False True 10185 2 \n",
|
||
"3 False True 5 1 \n",
|
||
"4 False True 2562 1 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 40cc5a346b1af4ee7108ac28b144fb77 \n",
|
||
"1 500b670b79aa592ecb06f4957800a752 \n",
|
||
"2 d5f62ed879867b8b51ed7b85f1fc3ab0 \n",
|
||
"3 822b47176c355a647aa2dbdf8dfbc594 \n",
|
||
"4 17b91f19c71ff6287ffc1f44af952576 "
|
||
]
|
||
},
|
||
"execution_count": 220,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"events = display_databases(\"8events.csv\")\n",
|
||
"events.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 221,
|
||
"id": "3db00b9d-2187-4cb6-980d-8ac6ab9eb460",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8event_types.csv\n",
|
||
"Shape : (73, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>standard</td>\n",
|
||
" <td>2021-06-29 13:52:10.434850+02:00</td>\n",
|
||
" <td>2021-06-29 13:52:10.434850+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>c00f0c4675b91fb8b918e4079a0b1bac</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>ptit lou</td>\n",
|
||
" <td>2021-06-29 21:33:13.000743+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.000743+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>dedd3579bc13b3ed7a90277247d9944b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>274</td>\n",
|
||
" <td>parking 19-20</td>\n",
|
||
" <td>2021-07-16 00:02:17.225410+02:00</td>\n",
|
||
" <td>2021-07-16 00:02:17.225410+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>0d348caeec0b66f9d4987dfbe30e1e8b</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>129</td>\n",
|
||
" <td>events 2018-2019</td>\n",
|
||
" <td>2021-06-30 01:35:18.110429+02:00</td>\n",
|
||
" <td>2021-06-30 01:35:18.110429+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>65eb39ddf8f79d28d93c2f2c53118f50</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>accreditations 2017-2018</td>\n",
|
||
" <td>2021-06-29 21:33:12.999510+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:12.999510+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>732cfdcf2065fa0005faf42793ddd76c</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 1 standard 2021-06-29 13:52:10.434850+02:00 \n",
|
||
"1 11 ptit lou 2021-06-29 21:33:13.000743+02:00 \n",
|
||
"2 274 parking 19-20 2021-07-16 00:02:17.225410+02:00 \n",
|
||
"3 129 events 2018-2019 2021-06-30 01:35:18.110429+02:00 \n",
|
||
"4 10 accreditations 2017-2018 2021-06-29 21:33:12.999510+02:00 \n",
|
||
"\n",
|
||
" updated_at fidelity_delay \\\n",
|
||
"0 2021-06-29 13:52:10.434850+02:00 36 \n",
|
||
"1 2021-06-29 21:33:13.000743+02:00 36 \n",
|
||
"2 2021-07-16 00:02:17.225410+02:00 36 \n",
|
||
"3 2021-06-30 01:35:18.110429+02:00 36 \n",
|
||
"4 2021-06-29 21:33:12.999510+02:00 36 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 c00f0c4675b91fb8b918e4079a0b1bac \n",
|
||
"1 dedd3579bc13b3ed7a90277247d9944b \n",
|
||
"2 0d348caeec0b66f9d4987dfbe30e1e8b \n",
|
||
"3 65eb39ddf8f79d28d93c2f2c53118f50 \n",
|
||
"4 732cfdcf2065fa0005faf42793ddd76c "
|
||
]
|
||
},
|
||
"execution_count": 221,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"event_types = display_databases(\"8event_types.csv\")\n",
|
||
"event_types.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 222,
|
||
"id": "cba0ee58-6280-45fe-99b3-0be09db5922b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8seasons.csv\n",
|
||
"Shape : (16, 6)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1501</td>\n",
|
||
" <td>saison 2023-2024</td>\n",
|
||
" <td>2022-06-25 03:07:31.209270+02:00</td>\n",
|
||
" <td>2022-06-25 03:07:31.209270+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>71f5c069ce45c5e933dcc37c22507fbf</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1194</td>\n",
|
||
" <td>saison 2049-2050</td>\n",
|
||
" <td>2022-02-17 03:24:23.942691+01:00</td>\n",
|
||
" <td>2022-02-17 03:24:23.942691+01:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>44e20620bbc5926db2e295d38b606afd</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>saison 2016-2017</td>\n",
|
||
" <td>2021-06-29 21:33:00.702563+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:00.702563+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>f9cf989d4f49300220df67ef93aa2294</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>47</td>\n",
|
||
" <td>saison 2018-2019</td>\n",
|
||
" <td>2021-06-30 01:35:15.156097+02:00</td>\n",
|
||
" <td>2021-06-30 01:35:15.156097+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>eec50c35fbf8593b364ced287335d90c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>100</td>\n",
|
||
" <td>saison 2010-2011</td>\n",
|
||
" <td>2021-07-16 00:23:27.607648+02:00</td>\n",
|
||
" <td>2021-07-16 00:23:27.607648+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7ccc51049a85e0df9b80662e45b6ddb8</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 1501 saison 2023-2024 2022-06-25 03:07:31.209270+02:00 \n",
|
||
"1 1194 saison 2049-2050 2022-02-17 03:24:23.942691+01:00 \n",
|
||
"2 2 saison 2016-2017 2021-06-29 21:33:00.702563+02:00 \n",
|
||
"3 47 saison 2018-2019 2021-06-30 01:35:15.156097+02:00 \n",
|
||
"4 100 saison 2010-2011 2021-07-16 00:23:27.607648+02:00 \n",
|
||
"\n",
|
||
" updated_at start_date_time \\\n",
|
||
"0 2022-06-25 03:07:31.209270+02:00 NaN \n",
|
||
"1 2022-02-17 03:24:23.942691+01:00 NaN \n",
|
||
"2 2021-06-29 21:33:00.702563+02:00 NaN \n",
|
||
"3 2021-06-30 01:35:15.156097+02:00 NaN \n",
|
||
"4 2021-07-16 00:23:27.607648+02:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 71f5c069ce45c5e933dcc37c22507fbf \n",
|
||
"1 44e20620bbc5926db2e295d38b606afd \n",
|
||
"2 f9cf989d4f49300220df67ef93aa2294 \n",
|
||
"3 eec50c35fbf8593b364ced287335d90c \n",
|
||
"4 7ccc51049a85e0df9b80662e45b6ddb8 "
|
||
]
|
||
},
|
||
"execution_count": 222,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"seasons = display_databases(\"8seasons.csv\")\n",
|
||
"seasons.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 223,
|
||
"id": "6fa82fd7-d6d3-4857-af24-ea573b1129d0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/8/8facilities.csv\n",
|
||
"Shape : (5, 7)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>74</td>\n",
|
||
" <td>plan pour campagne d'abo 2011/2012</td>\n",
|
||
" <td>2021-07-16 00:23:30.337698+02:00</td>\n",
|
||
" <td>2021-07-16 00:23:30.337698+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2e1d25d5f7e46e23c734fe0e4951390e</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>accreditation</td>\n",
|
||
" <td>2021-06-29 21:33:13.018552+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.018552+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>da37a04e592cbd344142730ce05a6887</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>organisation match exterieur</td>\n",
|
||
" <td>2021-06-29 21:33:13.019878+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.019878+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8f9ee8c2e954585f7c68096d7f1cf4f1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>parking matmut stadium</td>\n",
|
||
" <td>2021-06-29 21:33:13.017165+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.017165+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aeab282982ea738674dbf5c3763a0be0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>matmut stadium</td>\n",
|
||
" <td>2021-06-29 21:33:13.004560+02:00</td>\n",
|
||
" <td>2021-06-29 21:33:13.004560+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>89feffd283ebdabdc3b81fb62ea4f6f0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 74 plan pour campagne d'abo 2011/2012 2021-07-16 00:23:30.337698+02:00 \n",
|
||
"1 3 accreditation 2021-06-29 21:33:13.018552+02:00 \n",
|
||
"2 4 organisation match exterieur 2021-06-29 21:33:13.019878+02:00 \n",
|
||
"3 2 parking matmut stadium 2021-06-29 21:33:13.017165+02:00 \n",
|
||
"4 1 matmut stadium 2021-06-29 21:33:13.004560+02:00 \n",
|
||
"\n",
|
||
" updated_at street_id fixed_capacity \\\n",
|
||
"0 2021-07-16 00:23:30.337698+02:00 1 NaN \n",
|
||
"1 2021-06-29 21:33:13.018552+02:00 1 NaN \n",
|
||
"2 2021-06-29 21:33:13.019878+02:00 1 NaN \n",
|
||
"3 2021-06-29 21:33:13.017165+02:00 1 NaN \n",
|
||
"4 2021-06-29 21:33:13.004560+02:00 1 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 2e1d25d5f7e46e23c734fe0e4951390e \n",
|
||
"1 da37a04e592cbd344142730ce05a6887 \n",
|
||
"2 8f9ee8c2e954585f7c68096d7f1cf4f1 \n",
|
||
"3 aeab282982ea738674dbf5c3763a0be0 \n",
|
||
"4 89feffd283ebdabdc3b81fb62ea4f6f0 "
|
||
]
|
||
},
|
||
"execution_count": 223,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"facilities = display_databases(\"8facilities.csv\")\n",
|
||
"facilities.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c7467d41-0ded-465d-bb08-15be914a166b",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Analyze annexe databases"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "17e9e334-0ae4-48d8-bed5-b50b4af49d5b",
|
||
"metadata": {},
|
||
"source": [
|
||
"Meaning contributions.csv, contribution_sites.csv, currencies.csv, countries.csv and type_ofs.csc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d3ec1040-48b2-40bb-8947-920ddb4589f3",
|
||
"metadata": {},
|
||
"source": [
|
||
"## II. Identify Commons Datasets"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ec528a8a-df38-48e2-a1be-4a1459a80a1e",
|
||
"metadata": {},
|
||
"source": [
|
||
"From the analyze of the 8th company, we notice that some databases does not exist. Therefore, in order to construct a uniform database for all companies, we should first identify the common databases between all companies"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 224,
|
||
"id": "c240b811-48a6-4501-9e70-bc51d69e3ac4",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"## We first construct a dictionary reporting all the datasets for each companies\n",
|
||
"\n",
|
||
"companies = fs.ls(BUCKET)\n",
|
||
"companies_database = {}\n",
|
||
"\n",
|
||
"for company in companies:\n",
|
||
" companies_database[company.split('/')[-1]] = [file.split('/')[-1].replace(company.split('/')[-1], '') for file in fs.ls(company)] \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 225,
|
||
"id": "54057367-9df9-42f4-aa07-bf524bb76462",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of databases : 30\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Then we create a list of all database\n",
|
||
"\n",
|
||
"all_database = companies_database[max(companies_database, key=lambda x: len(companies_database[x]))]\n",
|
||
"print(\"Number of databases : \",len(all_database))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 226,
|
||
"id": "63914e20-9efc-4088-877b-edab5f225d00",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"30\n",
|
||
"23\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"## We then create a set of database in common for all companies\n",
|
||
"\n",
|
||
"data_in_common = set(all_database)\n",
|
||
"\n",
|
||
"print(len(data_in_common))\n",
|
||
"\n",
|
||
"for key in companies_database:\n",
|
||
" diff_database = data_in_common.symmetric_difference(companies_database[key])\n",
|
||
" data_in_common = data_in_common - diff_database\n",
|
||
"\n",
|
||
"print(len(data_in_common))\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "676d8536-7d8c-4075-a357-b8d06e501ca8",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Create Universal database"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "7e460fbe-5067-4998-a1a8-9e3d07401750",
|
||
"metadata": {},
|
||
"source": [
|
||
"We will first create a procedure to clean the datasets of a company and then merge them. Hence, we will be able to replicate this procedure for all companies and create a universal database.\n",
|
||
"\n",
|
||
"Let's first create our procedure for the company 1 and the datasets belongings to the theme producst"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 227,
|
||
"id": "590a132a-4f57-4ea3-a282-2ef913e4b753",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"directory_path = '1'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 228,
|
||
"id": "0fbebfb7-a827-46b1-890b-86c9def7cdbb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"theme_products = [\"products.csv\" ,\"categories.csv\", \"type_of_categories.csv\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 229,
|
||
"id": "b8aa5f8f-845e-4ee5-b80d-38b7061a94a2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def remove_horodates(df):\n",
|
||
" \"\"\"\n",
|
||
" this function remove horodate columns like created_at and updated_at\n",
|
||
" \"\"\"\n",
|
||
" df = df.drop(columns = [\"created_at\", \"updated_at\"])\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 230,
|
||
"id": "2c478213-09ae-44ef-8c7c-125bcb571642",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def order_columns_id(df):\n",
|
||
" \"\"\"\n",
|
||
" this function puts all id columns at the beginning in order to read the dataset easier\n",
|
||
" \"\"\"\n",
|
||
" substring = 'id'\n",
|
||
" id_columns = [col for col in df.columns if substring in col]\n",
|
||
" remaining_col = [col for col in df.columns if substring not in col]\n",
|
||
" new_order = id_columns + remaining_col\n",
|
||
" return df[new_order]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 231,
|
||
"id": "327e44b0-eb99-4022-b4ca-79548072f0f0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def percent_na(df):\n",
|
||
" \"\"\"\n",
|
||
" this function returns the percentage of na for each column\n",
|
||
" \"\"\"\n",
|
||
" percent_missing = df.isna().sum() * 100 / len(df)\n",
|
||
" return percent_missing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 232,
|
||
"id": "10926def-267f-4e86-b2c9-72e27ff9a9df",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def process_df(df):\n",
|
||
" df = remove_horodates(df)\n",
|
||
" print(\"Number of columns : \", len(df.columns))\n",
|
||
" df = order_columns_id(df)\n",
|
||
" print(\"Columns : \", df.columns)\n",
|
||
" print(\"Percent of NA for each column : \", percent_na(df))\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "98ac02cb-5295-47ca-99c6-99e622c5f388",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of products.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 233,
|
||
"id": "862a7658-0602-4d94-bb58-d23774c00d32",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1products.csv\n",
|
||
"Shape : (94803, 14)\n",
|
||
"Number of columns : 14\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
|
||
" <td>2020-09-03 14:09:43.119798+02:00</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>35c88f2db8a63d7474e46eb8ca9260e7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
|
||
" <td>2020-09-03 13:21:22.711773+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8a179671ab198e570e6a104c4451379f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
|
||
" <td>2020-09-03 14:46:33.589030+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>ee83779ce29e67ad251e40234b426d6a</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
|
||
" <td>2022-01-28 19:29:23.525722+01:00</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>d865383579314b791aa4bcf3fb418f17</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
|
||
" <td>2020-09-03 13:29:30.773089+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>f1c4689bc47dee6f60b56d74b593dd46</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id amount is_full_price representation_id pricing_formula_id \\\n",
|
||
"0 10682 9.0 False 914 114 \n",
|
||
"1 478 9.5 False 273 131 \n",
|
||
"2 20873 11.5 False 275 137 \n",
|
||
"3 157142 8.0 False 82519 9 \n",
|
||
"4 1341 8.5 False 9 93 \n",
|
||
"\n",
|
||
" created_at updated_at \\\n",
|
||
"0 2020-09-03 14:09:43.119798+02:00 2020-09-03 14:09:43.119798+02:00 \n",
|
||
"1 2020-09-03 13:21:22.711773+02:00 2020-09-03 13:21:22.711773+02:00 \n",
|
||
"2 2020-09-03 14:46:33.589030+02:00 2020-09-03 14:46:33.589030+02:00 \n",
|
||
"3 2022-01-28 19:29:23.525722+01:00 2022-01-28 19:29:23.525722+01:00 \n",
|
||
"4 2020-09-03 13:29:30.773089+02:00 2020-09-03 13:29:30.773089+02:00 \n",
|
||
"\n",
|
||
" category_id apply_price products_group_id product_pack_id extra_field \\\n",
|
||
"0 41 0.0 10655 1 NaN \n",
|
||
"1 1 0.0 471 1 NaN \n",
|
||
"2 1 0.0 20825 1 NaN \n",
|
||
"3 5 0.0 156773 1 NaN \n",
|
||
"4 1 0.0 1175 1 NaN \n",
|
||
"\n",
|
||
" amount_consumption identifier \n",
|
||
"0 NaN 35c88f2db8a63d7474e46eb8ca9260e7 \n",
|
||
"1 NaN 8a179671ab198e570e6a104c4451379f \n",
|
||
"2 NaN ee83779ce29e67ad251e40234b426d6a \n",
|
||
"3 NaN d865383579314b791aa4bcf3fb418f17 \n",
|
||
"4 NaN f1c4689bc47dee6f60b56d74b593dd46 "
|
||
]
|
||
},
|
||
"execution_count": 233,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products = display_databases(\"1products.csv\")\n",
|
||
"print(\"Number of columns : \", len(products.columns))\n",
|
||
"products.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 234,
|
||
"id": "f0db8c51-2792-4d49-9b1a-d98ce0d9ea28",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 12\n",
|
||
"Columns : Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n",
|
||
" 'products_group_id', 'product_pack_id', 'identifier', 'amount',\n",
|
||
" 'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>35c88f2db8a63d7474e46eb8ca9260e7</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8a179671ab198e570e6a104c4451379f</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>ee83779ce29e67ad251e40234b426d6a</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>d865383579314b791aa4bcf3fb418f17</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>f1c4689bc47dee6f60b56d74b593dd46</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id representation_id pricing_formula_id category_id \\\n",
|
||
"0 10682 914 114 41 \n",
|
||
"1 478 273 131 1 \n",
|
||
"2 20873 275 137 1 \n",
|
||
"3 157142 82519 9 5 \n",
|
||
"4 1341 9 93 1 \n",
|
||
"\n",
|
||
" products_group_id product_pack_id identifier \\\n",
|
||
"0 10655 1 35c88f2db8a63d7474e46eb8ca9260e7 \n",
|
||
"1 471 1 8a179671ab198e570e6a104c4451379f \n",
|
||
"2 20825 1 ee83779ce29e67ad251e40234b426d6a \n",
|
||
"3 156773 1 d865383579314b791aa4bcf3fb418f17 \n",
|
||
"4 1175 1 f1c4689bc47dee6f60b56d74b593dd46 \n",
|
||
"\n",
|
||
" amount is_full_price apply_price extra_field amount_consumption \n",
|
||
"0 9.0 False 0.0 NaN NaN \n",
|
||
"1 9.5 False 0.0 NaN NaN \n",
|
||
"2 11.5 False 0.0 NaN NaN \n",
|
||
"3 8.0 False 0.0 NaN NaN \n",
|
||
"4 8.5 False 0.0 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 234,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products = remove_horodates(products)\n",
|
||
"print(\"Number of columns : \", len(products.columns))\n",
|
||
"products = order_columns_id(products)\n",
|
||
"print(\"Columns : \", products.columns)\n",
|
||
"products.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 235,
|
||
"id": "a383474f-7da9-422c-bb69-3f0cc0b7053f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"id int64\n",
|
||
"representation_id int64\n",
|
||
"pricing_formula_id int64\n",
|
||
"category_id int64\n",
|
||
"products_group_id int64\n",
|
||
"product_pack_id int64\n",
|
||
"identifier object\n",
|
||
"amount float64\n",
|
||
"is_full_price bool\n",
|
||
"apply_price float64\n",
|
||
"extra_field float64\n",
|
||
"amount_consumption float64\n",
|
||
"dtype: object\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(products.dtypes)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 236,
|
||
"id": "460749ac-aa26-4216-8667-518546f72f72",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"id 0.0\n",
|
||
"representation_id 0.0\n",
|
||
"pricing_formula_id 0.0\n",
|
||
"category_id 0.0\n",
|
||
"products_group_id 0.0\n",
|
||
"product_pack_id 0.0\n",
|
||
"identifier 0.0\n",
|
||
"amount 0.0\n",
|
||
"is_full_price 0.0\n",
|
||
"apply_price 0.0\n",
|
||
"extra_field 100.0\n",
|
||
"amount_consumption 100.0\n",
|
||
"dtype: float64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"percent_missing = products.isna().sum() * 100 / len(products)\n",
|
||
"print(percent_missing)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ebcb48ab-adad-42e5-b5d7-7275771cd200",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of categories.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 237,
|
||
"id": "3efce2b6-2d2f-4da9-98ed-1aae17da624c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1categories.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 238,
|
||
"id": "38aa39fd-58af-4fb8-98f2-4269dbaf35de",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1categories.csv\n",
|
||
"Shape : (27, 7)\n",
|
||
"Number of columns : 7\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>quota</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>30</td>\n",
|
||
" <td>en nb entrées gr</td>\n",
|
||
" <td>2020-09-03 13:21:20.019202+02:00</td>\n",
|
||
" <td>2020-09-03 13:21:20.019202+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>849ab2791a14f5fc2bb4d87ab2b78bf6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>16</td>\n",
|
||
" <td>indiv activité enfant</td>\n",
|
||
" <td>2020-09-03 13:11:23.306968+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.306968+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>425fd2f01984cc4ba030c1be98f42c33</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>indiv activité gr</td>\n",
|
||
" <td>2020-09-03 13:21:20.029901+02:00</td>\n",
|
||
" <td>2020-09-03 13:21:20.029901+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9244dd3738788db0d22a5d0afe687b69</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1108</td>\n",
|
||
" <td>groupe forfait adulte</td>\n",
|
||
" <td>2020-09-19 02:06:43.145697+02:00</td>\n",
|
||
" <td>2020-09-19 02:06:43.145697+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3edda20c877a93b5ff883827238eb711</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>groupe forfait entrées tr</td>\n",
|
||
" <td>2020-09-03 13:11:23.264997+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.264997+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>ff48df4b2dd5a14116bf4d280b31621e</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 30 en nb entrées gr 2020-09-03 13:21:20.019202+02:00 \n",
|
||
"1 16 indiv activité enfant 2020-09-03 13:11:23.306968+02:00 \n",
|
||
"2 39 indiv activité gr 2020-09-03 13:21:20.029901+02:00 \n",
|
||
"3 1108 groupe forfait adulte 2020-09-19 02:06:43.145697+02:00 \n",
|
||
"4 6 groupe forfait entrées tr 2020-09-03 13:11:23.264997+02:00 \n",
|
||
"\n",
|
||
" updated_at extra_field quota \\\n",
|
||
"0 2020-09-03 13:21:20.019202+02:00 NaN NaN \n",
|
||
"1 2020-09-03 13:11:23.306968+02:00 NaN NaN \n",
|
||
"2 2020-09-03 13:21:20.029901+02:00 NaN NaN \n",
|
||
"3 2020-09-19 02:06:43.145697+02:00 NaN NaN \n",
|
||
"4 2020-09-03 13:11:23.264997+02:00 NaN NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 849ab2791a14f5fc2bb4d87ab2b78bf6 \n",
|
||
"1 425fd2f01984cc4ba030c1be98f42c33 \n",
|
||
"2 9244dd3738788db0d22a5d0afe687b69 \n",
|
||
"3 3edda20c877a93b5ff883827238eb711 \n",
|
||
"4 ff48df4b2dd5a14116bf4d280b31621e "
|
||
]
|
||
},
|
||
"execution_count": 238,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 239,
|
||
"id": "99eb6d14-8b4b-4d55-8fc7-ddf2726096f4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 3.703704\n",
|
||
"extra_field 100.000000\n",
|
||
"quota 100.000000\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field</th>\n",
|
||
" <th>quota</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>30</td>\n",
|
||
" <td>849ab2791a14f5fc2bb4d87ab2b78bf6</td>\n",
|
||
" <td>en nb entrées gr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>16</td>\n",
|
||
" <td>425fd2f01984cc4ba030c1be98f42c33</td>\n",
|
||
" <td>indiv activité enfant</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>9244dd3738788db0d22a5d0afe687b69</td>\n",
|
||
" <td>indiv activité gr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1108</td>\n",
|
||
" <td>3edda20c877a93b5ff883827238eb711</td>\n",
|
||
" <td>groupe forfait adulte</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>ff48df4b2dd5a14116bf4d280b31621e</td>\n",
|
||
" <td>groupe forfait entrées tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id identifier name \\\n",
|
||
"0 30 849ab2791a14f5fc2bb4d87ab2b78bf6 en nb entrées gr \n",
|
||
"1 16 425fd2f01984cc4ba030c1be98f42c33 indiv activité enfant \n",
|
||
"2 39 9244dd3738788db0d22a5d0afe687b69 indiv activité gr \n",
|
||
"3 1108 3edda20c877a93b5ff883827238eb711 groupe forfait adulte \n",
|
||
"4 6 ff48df4b2dd5a14116bf4d280b31621e groupe forfait entrées tr \n",
|
||
"\n",
|
||
" extra_field quota \n",
|
||
"0 NaN NaN \n",
|
||
"1 NaN NaN \n",
|
||
"2 NaN NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 239,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 240,
|
||
"id": "c5f39cc9-dff8-452c-9a3e-9f7df81a8a19",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"extra_field float64\n",
|
||
"quota float64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 240,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c4cb0b37-2262-45c0-97be-b12c503016e3",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of type_of_categories.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "3b4a3af9-ed12-43ec-b17e-fd425b238265",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of representation_category_capacities.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "135966fb-aab1-48d7-bb4c-39a53ee643ca",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of representations.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b480f39f-d5c7-4ded-8f64-ea8ac31f5db5",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of events.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 241,
|
||
"id": "2d52d6da-cca5-4abd-be05-2f00fd3eca8e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1events.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 242,
|
||
"id": "6cab507d-8b11-404d-9286-5cc205228af9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1events.csv\n",
|
||
"Shape : (1232, 12)\n",
|
||
"Number of columns : 12\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>2020-09-03 13:36:42.216991+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.663219+01:00</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>c1cecd093146068fd57896e254e98170</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2023-11-04 02:50:34.602462+01:00</td>\n",
|
||
" <td>2023-11-04 02:52:26.138154+01:00</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>f510a6710878d7aca36e71c54abab525</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>2020-09-03 13:29:27.944002+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.652026+01:00</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>21177fa9acad1ae2b1f595690fb853d3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>2021-07-31 11:16:42.575583+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.663219+01:00</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>962601f1eb153d45d49437f8fe839f7f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>2021-08-17 13:40:34.111923+02:00</td>\n",
|
||
" <td>2021-11-02 15:06:40.663219+01:00</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>bfa22f5a2364a2dacfc45cca1c8d3215</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id created_at updated_at \\\n",
|
||
"0 192 2020-09-03 13:36:42.216991+02:00 2021-11-02 15:06:40.663219+01:00 \n",
|
||
"1 30329 2023-11-04 02:50:34.602462+01:00 2023-11-04 02:52:26.138154+01:00 \n",
|
||
"2 161 2020-09-03 13:29:27.944002+02:00 2021-11-02 15:06:40.652026+01:00 \n",
|
||
"3 5957 2021-07-31 11:16:42.575583+02:00 2021-11-02 15:06:40.663219+01:00 \n",
|
||
"4 8337 2021-08-17 13:40:34.111923+02:00 2021-11-02 15:06:40.663219+01:00 \n",
|
||
"\n",
|
||
" season_id facility_id name \\\n",
|
||
"0 16 1 frontières \n",
|
||
"1 2767 1 visite guidée une autre histoire du monde (1h00) \n",
|
||
"2 16 1 visite contée les chercheurs d'or indiv \n",
|
||
"3 582 1 we dreamt of utopia and we woke up screaming. \n",
|
||
"4 582 1 jeff koons épisodes 4 \n",
|
||
"\n",
|
||
" event_type_id manual_added is_display event_type_key_id \\\n",
|
||
"0 4 False True 4 \n",
|
||
"1 5 False True 5 \n",
|
||
"2 2 False True 2 \n",
|
||
"3 4 False True 4 \n",
|
||
"4 4 False True 4 \n",
|
||
"\n",
|
||
" facility_key_id identifier \n",
|
||
"0 1 c1cecd093146068fd57896e254e98170 \n",
|
||
"1 1 f510a6710878d7aca36e71c54abab525 \n",
|
||
"2 1 21177fa9acad1ae2b1f595690fb853d3 \n",
|
||
"3 1 962601f1eb153d45d49437f8fe839f7f \n",
|
||
"4 1 bfa22f5a2364a2dacfc45cca1c8d3215 "
|
||
]
|
||
},
|
||
"execution_count": 242,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 243,
|
||
"id": "9fe57873-8108-44c9-b8a5-f58d3cbb6d17",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 10\n",
|
||
"Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n",
|
||
" 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n",
|
||
" dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"season_id 0.000000\n",
|
||
"facility_id 0.000000\n",
|
||
"event_type_id 0.000000\n",
|
||
"event_type_key_id 0.000000\n",
|
||
"facility_key_id 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 0.974026\n",
|
||
"manual_added 0.000000\n",
|
||
"is_display 0.000000\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>c1cecd093146068fd57896e254e98170</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>f510a6710878d7aca36e71c54abab525</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>21177fa9acad1ae2b1f595690fb853d3</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>962601f1eb153d45d49437f8fe839f7f</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>bfa22f5a2364a2dacfc45cca1c8d3215</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id season_id facility_id event_type_id event_type_key_id \\\n",
|
||
"0 192 16 1 4 4 \n",
|
||
"1 30329 2767 1 5 5 \n",
|
||
"2 161 16 1 2 2 \n",
|
||
"3 5957 582 1 4 4 \n",
|
||
"4 8337 582 1 4 4 \n",
|
||
"\n",
|
||
" facility_key_id identifier \\\n",
|
||
"0 1 c1cecd093146068fd57896e254e98170 \n",
|
||
"1 1 f510a6710878d7aca36e71c54abab525 \n",
|
||
"2 1 21177fa9acad1ae2b1f595690fb853d3 \n",
|
||
"3 1 962601f1eb153d45d49437f8fe839f7f \n",
|
||
"4 1 bfa22f5a2364a2dacfc45cca1c8d3215 \n",
|
||
"\n",
|
||
" name manual_added is_display \n",
|
||
"0 frontières False True \n",
|
||
"1 visite guidée une autre histoire du monde (1h00) False True \n",
|
||
"2 visite contée les chercheurs d'or indiv False True \n",
|
||
"3 we dreamt of utopia and we woke up screaming. False True \n",
|
||
"4 jeff koons épisodes 4 False True "
|
||
]
|
||
},
|
||
"execution_count": 243,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 244,
|
||
"id": "7fd9e5bd-baac-4b3b-9ffb-5a9baa18399b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"season_id int64\n",
|
||
"facility_id int64\n",
|
||
"event_type_id int64\n",
|
||
"event_type_key_id int64\n",
|
||
"facility_key_id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"manual_added bool\n",
|
||
"is_display bool\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 244,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "24186efa-5908-4b03-bf52-96415fc8bd54",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of event_types.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 245,
|
||
"id": "90ab62d4-a086-4469-961c-67eefb375388",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1event_types.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 246,
|
||
"id": "58db1751-fd56-4c28-b49e-bc8235bb0dc8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1event_types.csv\n",
|
||
"Shape : (9, 6)\n",
|
||
"Number of columns : 6\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>standard</td>\n",
|
||
" <td>2020-09-03 12:24:22.574262+02:00</td>\n",
|
||
" <td>2020-09-03 12:24:22.574262+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>c00f0c4675b91fb8b918e4079a0b1bac</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>66</td>\n",
|
||
" <td>package</td>\n",
|
||
" <td>2020-09-03 14:05:04.648137+02:00</td>\n",
|
||
" <td>2020-09-03 14:05:04.648137+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>efe90a8e604a7c840e88d03a67f6b7d8</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>83</td>\n",
|
||
" <td>guide multimédias</td>\n",
|
||
" <td>2020-09-03 14:15:17.252539+02:00</td>\n",
|
||
" <td>2020-09-03 14:15:17.252539+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>ee14c62b3b9f6c7dd5401685a18e4460</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>2020-09-03 13:11:23.117024+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.117024+02:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2723</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2021-12-22 09:45:47.715105+01:00</td>\n",
|
||
" <td>2021-12-22 09:45:47.715105+01:00</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>d41d8cd98f00b204e9800998ecf8427e</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 1 standard 2020-09-03 12:24:22.574262+02:00 \n",
|
||
"1 66 package 2020-09-03 14:05:04.648137+02:00 \n",
|
||
"2 83 guide multimédias 2020-09-03 14:15:17.252539+02:00 \n",
|
||
"3 3 non défini 2020-09-03 13:11:23.117024+02:00 \n",
|
||
"4 2723 NaN 2021-12-22 09:45:47.715105+01:00 \n",
|
||
"\n",
|
||
" updated_at fidelity_delay \\\n",
|
||
"0 2020-09-03 12:24:22.574262+02:00 36 \n",
|
||
"1 2020-09-03 14:05:04.648137+02:00 36 \n",
|
||
"2 2020-09-03 14:15:17.252539+02:00 36 \n",
|
||
"3 2020-09-03 13:11:23.117024+02:00 36 \n",
|
||
"4 2021-12-22 09:45:47.715105+01:00 36 \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 c00f0c4675b91fb8b918e4079a0b1bac \n",
|
||
"1 efe90a8e604a7c840e88d03a67f6b7d8 \n",
|
||
"2 ee14c62b3b9f6c7dd5401685a18e4460 \n",
|
||
"3 52ff3466787b4d538407372e5f7afe0f \n",
|
||
"4 d41d8cd98f00b204e9800998ecf8427e "
|
||
]
|
||
},
|
||
"execution_count": 246,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 247,
|
||
"id": "ac93382c-0b5f-462d-8021-0dd1e7201b8c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"fidelity_delay 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 11.111111\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>c00f0c4675b91fb8b918e4079a0b1bac</td>\n",
|
||
" <td>standard</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>66</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>efe90a8e604a7c840e88d03a67f6b7d8</td>\n",
|
||
" <td>package</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>83</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>ee14c62b3b9f6c7dd5401685a18e4460</td>\n",
|
||
" <td>guide multimédias</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2723</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>d41d8cd98f00b204e9800998ecf8427e</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id fidelity_delay identifier name\n",
|
||
"0 1 36 c00f0c4675b91fb8b918e4079a0b1bac standard\n",
|
||
"1 66 36 efe90a8e604a7c840e88d03a67f6b7d8 package\n",
|
||
"2 83 36 ee14c62b3b9f6c7dd5401685a18e4460 guide multimédias\n",
|
||
"3 3 36 52ff3466787b4d538407372e5f7afe0f non défini\n",
|
||
"4 2723 36 d41d8cd98f00b204e9800998ecf8427e NaN"
|
||
]
|
||
},
|
||
"execution_count": 247,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 248,
|
||
"id": "18cbd630-3c7d-49e1-932b-9460badf3758",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"fidelity_delay int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 248,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5847a441-31b9-4802-a5ae-90d8c6d6e153",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep analysis of seasons.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 249,
|
||
"id": "ae544dcc-f23d-4216-bb5b-597cc1b3765e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1seasons.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 250,
|
||
"id": "1ac97963-9208-4329-be41-d71a5797487f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1seasons.csv\n",
|
||
"Shape : (13, 6)\n",
|
||
"Number of columns : 6\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>943</td>\n",
|
||
" <td>2013</td>\n",
|
||
" <td>2021-07-29 08:55:33.282607+02:00</td>\n",
|
||
" <td>2021-07-29 08:55:33.282607+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8038da89e49ac5eabb489cfc6cea9fc1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>129</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>2020-09-03 15:13:08.105567+02:00</td>\n",
|
||
" <td>2020-09-03 15:13:08.105567+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>cee8d6b7ce52554fd70354e37bbf44a2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2015</td>\n",
|
||
" <td>2020-09-03 13:11:19.405037+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:19.405037+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>65d2ea03425887a717c435081cfc5dbb</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2016</td>\n",
|
||
" <td>2020-09-03 13:11:19.401001+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:19.401001+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>95192c98732387165bf8e396c0f2dad2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2017</td>\n",
|
||
" <td>2020-09-03 13:11:19.409005+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:19.409005+02:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8d8818c8e140c64c743113f563cf750f</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 943 2013 2021-07-29 08:55:33.282607+02:00 \n",
|
||
"1 129 2014 2020-09-03 15:13:08.105567+02:00 \n",
|
||
"2 3 2015 2020-09-03 13:11:19.405037+02:00 \n",
|
||
"3 2 2016 2020-09-03 13:11:19.401001+02:00 \n",
|
||
"4 4 2017 2020-09-03 13:11:19.409005+02:00 \n",
|
||
"\n",
|
||
" updated_at start_date_time \\\n",
|
||
"0 2021-07-29 08:55:33.282607+02:00 NaN \n",
|
||
"1 2020-09-03 15:13:08.105567+02:00 NaN \n",
|
||
"2 2020-09-03 13:11:19.405037+02:00 NaN \n",
|
||
"3 2020-09-03 13:11:19.401001+02:00 NaN \n",
|
||
"4 2020-09-03 13:11:19.409005+02:00 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 8038da89e49ac5eabb489cfc6cea9fc1 \n",
|
||
"1 cee8d6b7ce52554fd70354e37bbf44a2 \n",
|
||
"2 65d2ea03425887a717c435081cfc5dbb \n",
|
||
"3 95192c98732387165bf8e396c0f2dad2 \n",
|
||
"4 8d8818c8e140c64c743113f563cf750f "
|
||
]
|
||
},
|
||
"execution_count": 250,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 251,
|
||
"id": "b4593d46-105c-47dd-aa71-babd8e63e65b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.000000\n",
|
||
"identifier 0.000000\n",
|
||
"name 7.692308\n",
|
||
"start_date_time 100.000000\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>943</td>\n",
|
||
" <td>8038da89e49ac5eabb489cfc6cea9fc1</td>\n",
|
||
" <td>2013</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>129</td>\n",
|
||
" <td>cee8d6b7ce52554fd70354e37bbf44a2</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>65d2ea03425887a717c435081cfc5dbb</td>\n",
|
||
" <td>2015</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>95192c98732387165bf8e396c0f2dad2</td>\n",
|
||
" <td>2016</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>8d8818c8e140c64c743113f563cf750f</td>\n",
|
||
" <td>2017</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id identifier name start_date_time\n",
|
||
"0 943 8038da89e49ac5eabb489cfc6cea9fc1 2013 NaN\n",
|
||
"1 129 cee8d6b7ce52554fd70354e37bbf44a2 2014 NaN\n",
|
||
"2 3 65d2ea03425887a717c435081cfc5dbb 2015 NaN\n",
|
||
"3 2 95192c98732387165bf8e396c0f2dad2 2016 NaN\n",
|
||
"4 4 8d8818c8e140c64c743113f563cf750f 2017 NaN"
|
||
]
|
||
},
|
||
"execution_count": 251,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 252,
|
||
"id": "5d3b096d-8e73-4514-94e5-f2dcd4d0a89c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"start_date_time float64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 252,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a7b00bc7-eae6-457c-ac68-a4a55a6d1c8c",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Deep Analysis of facilities.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 253,
|
||
"id": "d95ef015-d44c-4353-8761-771b910d21c9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"name_dataset = '1facilities.csv'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 254,
|
||
"id": "ef5fe794-8df7-4f27-8554-ecdc4074ac0b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1facilities.csv\n",
|
||
"Shape : (2, 7)\n",
|
||
"Number of columns : 7\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>updated_at</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>2020-09-03 13:16:35.293111+02:00</td>\n",
|
||
" <td>2020-09-03 13:16:35.293111+02:00</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>2020-09-03 13:11:23.133059+02:00</td>\n",
|
||
" <td>2020-09-03 13:11:23.133059+02:00</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>702bd76fe3dd5dbcf118a6965a946f54</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id name created_at \\\n",
|
||
"0 2 non défini 2020-09-03 13:16:35.293111+02:00 \n",
|
||
"1 1 mucem 2020-09-03 13:11:23.133059+02:00 \n",
|
||
"\n",
|
||
" updated_at street_id fixed_capacity \\\n",
|
||
"0 2020-09-03 13:16:35.293111+02:00 2 NaN \n",
|
||
"1 2020-09-03 13:11:23.133059+02:00 1 NaN \n",
|
||
"\n",
|
||
" identifier \n",
|
||
"0 52ff3466787b4d538407372e5f7afe0f \n",
|
||
"1 702bd76fe3dd5dbcf118a6965a946f54 "
|
||
]
|
||
},
|
||
"execution_count": 254,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = display_databases(name_dataset)\n",
|
||
"print(\"Number of columns : \", len(df.columns))\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 255,
|
||
"id": "e3621201-fab9-49fd-95c1-0b9d5da76e50",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n",
|
||
"Percent of NA for each column : id 0.0\n",
|
||
"street_id 0.0\n",
|
||
"identifier 0.0\n",
|
||
"name 0.0\n",
|
||
"fixed_capacity 100.0\n",
|
||
"dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>identifier</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>52ff3466787b4d538407372e5f7afe0f</td>\n",
|
||
" <td>non défini</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>702bd76fe3dd5dbcf118a6965a946f54</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id street_id identifier name fixed_capacity\n",
|
||
"0 2 2 52ff3466787b4d538407372e5f7afe0f non défini NaN\n",
|
||
"1 1 1 702bd76fe3dd5dbcf118a6965a946f54 mucem NaN"
|
||
]
|
||
},
|
||
"execution_count": 255,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = process_df(df)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 256,
|
||
"id": "1b198b92-8654-4531-a0dd-8f2e01c2e6c1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id int64\n",
|
||
"street_id int64\n",
|
||
"identifier object\n",
|
||
"name object\n",
|
||
"fixed_capacity float64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 256,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ab5c4c2d-3e04-457d-a183-e173df89b650",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Merge"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 257,
|
||
"id": "43576244-c8cf-4ca0-b056-7aea1fbf0bc7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def process_df_2(df):\n",
|
||
" df = remove_horodates(df)\n",
|
||
" print(\"Number of columns : \", len(df.columns))\n",
|
||
" df = order_columns_id(df)\n",
|
||
" print(\"Columns : \", df.columns)\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 258,
|
||
"id": "0fad097e-474c-4af7-b1e1-7d8dda3f09ea",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def load_dataset(name):\n",
|
||
" df = display_databases(name)\n",
|
||
" df = process_df_2(df)\n",
|
||
" # drop na :\n",
|
||
" #df = df.dropna(axis=1, thresh=len(df))\n",
|
||
" df = df.drop(columns = 'identifier')\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "58ad68ff-3e20-4c4a-a122-3694670cbd28",
|
||
"metadata": {},
|
||
"source": [
|
||
"Merge between products and categories is useless as the relevant columns in categories are full of NA. Is it true for all companies ?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 259,
|
||
"id": "a28e2269-4825-4ca1-886b-d425ace118ac",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1products.csv\n",
|
||
"Shape : (94803, 14)\n",
|
||
"Number of columns : 12\n",
|
||
"Columns : Index(['id', 'representation_id', 'pricing_formula_id', 'category_id',\n",
|
||
" 'products_group_id', 'product_pack_id', 'identifier', 'amount',\n",
|
||
" 'is_full_price', 'apply_price', 'extra_field', 'amount_consumption'],\n",
|
||
" dtype='object')\n",
|
||
"File path : bdc2324-data/1/1categories.csv\n",
|
||
"Shape : (27, 7)\n",
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# products + categories\n",
|
||
"\n",
|
||
"products = load_dataset(\"1products.csv\")\n",
|
||
"categories = load_dataset(\"1categories.csv\")\n",
|
||
"\n",
|
||
"products_theme = products.merge(categories, how = 'left', left_on = 'category_id', right_on = 'id', suffixes=('_products', '_categories'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 260,
|
||
"id": "4c926d7b-fa74-4cdb-aecb-79e8fd1fdcbc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_products</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>extra_field_products</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" <th>id_categories</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field_categories</th>\n",
|
||
" <th>quota</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>indiv activité tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>indiv entrées tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_products representation_id pricing_formula_id category_id \\\n",
|
||
"0 10682 914 114 41 \n",
|
||
"1 478 273 131 1 \n",
|
||
"2 20873 275 137 1 \n",
|
||
"3 157142 82519 9 5 \n",
|
||
"4 1341 9 93 1 \n",
|
||
"\n",
|
||
" products_group_id product_pack_id amount is_full_price apply_price \\\n",
|
||
"0 10655 1 9.0 False 0.0 \n",
|
||
"1 471 1 9.5 False 0.0 \n",
|
||
"2 20825 1 11.5 False 0.0 \n",
|
||
"3 156773 1 8.0 False 0.0 \n",
|
||
"4 1175 1 8.5 False 0.0 \n",
|
||
"\n",
|
||
" extra_field_products amount_consumption id_categories name \\\n",
|
||
"0 NaN NaN 41 indiv activité tr \n",
|
||
"1 NaN NaN 1 indiv entrées tp \n",
|
||
"2 NaN NaN 1 indiv entrées tp \n",
|
||
"3 NaN NaN 5 indiv entrées tr \n",
|
||
"4 NaN NaN 1 indiv entrées tp \n",
|
||
"\n",
|
||
" extra_field_categories quota \n",
|
||
"0 NaN NaN \n",
|
||
"1 NaN NaN \n",
|
||
"2 NaN NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 260,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 261,
|
||
"id": "4e60911d-6aad-4350-b210-f007a85b8638",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1type_of_categories.csv\n",
|
||
"Shape : (5, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'type_of_id', 'category_id', 'identifier'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>26</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>28</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" type_of_id category_id\n",
|
||
"0 12 26\n",
|
||
"1 12 15\n",
|
||
"2 12 28\n",
|
||
"3 12 1\n",
|
||
"4 12 3"
|
||
]
|
||
},
|
||
"execution_count": 261,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# products_theme + type_of_categories\n",
|
||
"\n",
|
||
"type_of_categories = load_dataset(\"1type_of_categories.csv\")\n",
|
||
"type_of_categories = type_of_categories.drop(columns = 'id')\n",
|
||
"type_of_categories"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 262,
|
||
"id": "af9bca6c-3616-4b6d-8471-3dcdc62fc9bf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_products</th>\n",
|
||
" <th>representation_id</th>\n",
|
||
" <th>pricing_formula_id</th>\n",
|
||
" <th>category_id</th>\n",
|
||
" <th>products_group_id</th>\n",
|
||
" <th>product_pack_id</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>apply_price</th>\n",
|
||
" <th>extra_field_products</th>\n",
|
||
" <th>amount_consumption</th>\n",
|
||
" <th>id_categories</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>extra_field_categories</th>\n",
|
||
" <th>quota</th>\n",
|
||
" <th>type_of_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10682</td>\n",
|
||
" <td>914</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>10655</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>indiv activité tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>478</td>\n",
|
||
" <td>273</td>\n",
|
||
" <td>131</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>471</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>20873</td>\n",
|
||
" <td>275</td>\n",
|
||
" <td>137</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20825</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>157142</td>\n",
|
||
" <td>82519</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>156773</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>indiv entrées tr</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1341</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>93</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1175</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.5</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>indiv entrées tp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_products representation_id pricing_formula_id category_id \\\n",
|
||
"0 10682 914 114 41 \n",
|
||
"1 478 273 131 1 \n",
|
||
"2 20873 275 137 1 \n",
|
||
"3 157142 82519 9 5 \n",
|
||
"4 1341 9 93 1 \n",
|
||
"\n",
|
||
" products_group_id product_pack_id amount is_full_price apply_price \\\n",
|
||
"0 10655 1 9.0 False 0.0 \n",
|
||
"1 471 1 9.5 False 0.0 \n",
|
||
"2 20825 1 11.5 False 0.0 \n",
|
||
"3 156773 1 8.0 False 0.0 \n",
|
||
"4 1175 1 8.5 False 0.0 \n",
|
||
"\n",
|
||
" extra_field_products amount_consumption id_categories name \\\n",
|
||
"0 NaN NaN 41 indiv activité tr \n",
|
||
"1 NaN NaN 1 indiv entrées tp \n",
|
||
"2 NaN NaN 1 indiv entrées tp \n",
|
||
"3 NaN NaN 5 indiv entrées tr \n",
|
||
"4 NaN NaN 1 indiv entrées tp \n",
|
||
"\n",
|
||
" extra_field_categories quota type_of_id \n",
|
||
"0 NaN NaN NaN \n",
|
||
"1 NaN NaN 12.0 \n",
|
||
"2 NaN NaN 12.0 \n",
|
||
"3 NaN NaN NaN \n",
|
||
"4 NaN NaN 12.0 "
|
||
]
|
||
},
|
||
"execution_count": 262,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_theme = products_theme.merge(type_of_categories, how = 'left', left_on = 'category_id', right_on = 'category_id' )\n",
|
||
"products_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 268,
|
||
"id": "c631a8ce-f38c-433d-ab0e-17c10cc5894c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1events.csv\n",
|
||
"Shape : (1232, 12)\n",
|
||
"Number of columns : 10\n",
|
||
"Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n",
|
||
" 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n",
|
||
" dtype='object')\n",
|
||
"File path : bdc2324-data/1/1seasons.csv\n",
|
||
"Shape : (13, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_events</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>id_seasons</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>2023</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_events season_id facility_id event_type_id event_type_key_id \\\n",
|
||
"0 192 16 1 4 4 \n",
|
||
"1 30329 2767 1 5 5 \n",
|
||
"2 161 16 1 2 2 \n",
|
||
"3 5957 582 1 4 4 \n",
|
||
"4 8337 582 1 4 4 \n",
|
||
"\n",
|
||
" facility_key_id name_events \\\n",
|
||
"0 1 frontières \n",
|
||
"1 1 visite guidée une autre histoire du monde (1h00) \n",
|
||
"2 1 visite contée les chercheurs d'or indiv \n",
|
||
"3 1 we dreamt of utopia and we woke up screaming. \n",
|
||
"4 1 jeff koons épisodes 4 \n",
|
||
"\n",
|
||
" manual_added is_display id_seasons name_seasons start_date_time \n",
|
||
"0 False True 16 2018 NaN \n",
|
||
"1 False True 2767 2023 NaN \n",
|
||
"2 False True 16 2018 NaN \n",
|
||
"3 False True 582 2021 NaN \n",
|
||
"4 False True 582 2021 NaN "
|
||
]
|
||
},
|
||
"execution_count": 268,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# events + seasons\n",
|
||
"\n",
|
||
"events = load_dataset(\"1events.csv\")\n",
|
||
"seasons = load_dataset(\"1seasons.csv\")\n",
|
||
"\n",
|
||
"events_theme = events.merge(seasons, how = 'left', left_on = 'season_id', right_on = 'id', suffixes=('_events', '_seasons'))\n",
|
||
"\n",
|
||
"events_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 270,
|
||
"id": "83a166c4-f2e1-4af3-9e60-74fbac829bf3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1event_types.csv\n",
|
||
"Shape : (9, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n",
|
||
"Columns events_theme : Index(['id_events', 'season_id', 'facility_id', 'event_type_id',\n",
|
||
" 'event_type_key_id', 'facility_key_id', 'name_events', 'manual_added',\n",
|
||
" 'is_display', 'id_seasons', 'name_seasons', 'start_date_time', 'id',\n",
|
||
" 'fidelity_delay', 'name_event_types'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_events</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>id_seasons</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>2023</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_events season_id facility_id event_type_id event_type_key_id \\\n",
|
||
"0 192 16 1 4 4 \n",
|
||
"1 30329 2767 1 5 5 \n",
|
||
"2 161 16 1 2 2 \n",
|
||
"3 5957 582 1 4 4 \n",
|
||
"4 8337 582 1 4 4 \n",
|
||
"\n",
|
||
" facility_key_id name_events \\\n",
|
||
"0 1 frontières \n",
|
||
"1 1 visite guidée une autre histoire du monde (1h00) \n",
|
||
"2 1 visite contée les chercheurs d'or indiv \n",
|
||
"3 1 we dreamt of utopia and we woke up screaming. \n",
|
||
"4 1 jeff koons épisodes 4 \n",
|
||
"\n",
|
||
" manual_added is_display id_seasons name_seasons start_date_time \\\n",
|
||
"0 False True 16 2018 NaN \n",
|
||
"1 False True 2767 2023 NaN \n",
|
||
"2 False True 16 2018 NaN \n",
|
||
"3 False True 582 2021 NaN \n",
|
||
"4 False True 582 2021 NaN \n",
|
||
"\n",
|
||
" fidelity_delay name_event_types \n",
|
||
"0 36 spectacle vivant \n",
|
||
"1 36 offre muséale groupe \n",
|
||
"2 36 offre muséale individuel \n",
|
||
"3 36 spectacle vivant \n",
|
||
"4 36 spectacle vivant "
|
||
]
|
||
},
|
||
"execution_count": 270,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# events_theme + event_types\n",
|
||
"\n",
|
||
"event_types = load_dataset(\"1event_types.csv\")\n",
|
||
"\n",
|
||
"events_theme = events_theme.merge(event_types, how = 'left', left_on = 'event_type_id', right_on = 'id', suffixes=('_events', '_event_type'))\n",
|
||
"events_theme = events_theme.rename(columns = {\"name\" : \"name_event_types\"})\n",
|
||
"print(\"Columns events_theme : \", events_theme.columns)\n",
|
||
"events_theme = events_theme.drop(columns = 'id')\n",
|
||
"events_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 271,
|
||
"id": "c1734e4b-ba23-4921-b80d-471057373f43",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : bdc2324-data/1/1facilities.csv\n",
|
||
"Shape : (2, 7)\n",
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"facilities = load_dataset(\"1facilities.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 272,
|
||
"id": "fd930794-e3ad-46f8-aa55-3b1fc3cea64d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Columns events_theme : Index(['id_events', 'season_id', 'facility_id', 'event_type_id',\n",
|
||
" 'event_type_key_id', 'facility_key_id', 'name_events', 'manual_added',\n",
|
||
" 'is_display', 'id_seasons', 'name_seasons', 'start_date_time',\n",
|
||
" 'fidelity_delay', 'name_event_types', 'id', 'street_id', 'name',\n",
|
||
" 'fixed_capacity'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_events</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>id_seasons</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>2023</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_events season_id facility_id event_type_id event_type_key_id \\\n",
|
||
"0 192 16 1 4 4 \n",
|
||
"1 30329 2767 1 5 5 \n",
|
||
"2 161 16 1 2 2 \n",
|
||
"3 5957 582 1 4 4 \n",
|
||
"4 8337 582 1 4 4 \n",
|
||
"\n",
|
||
" facility_key_id name_events \\\n",
|
||
"0 1 frontières \n",
|
||
"1 1 visite guidée une autre histoire du monde (1h00) \n",
|
||
"2 1 visite contée les chercheurs d'or indiv \n",
|
||
"3 1 we dreamt of utopia and we woke up screaming. \n",
|
||
"4 1 jeff koons épisodes 4 \n",
|
||
"\n",
|
||
" manual_added is_display id_seasons name_seasons start_date_time \\\n",
|
||
"0 False True 16 2018 NaN \n",
|
||
"1 False True 2767 2023 NaN \n",
|
||
"2 False True 16 2018 NaN \n",
|
||
"3 False True 582 2021 NaN \n",
|
||
"4 False True 582 2021 NaN \n",
|
||
"\n",
|
||
" fidelity_delay name_event_types street_id name fixed_capacity \n",
|
||
"0 36 spectacle vivant 1 mucem NaN \n",
|
||
"1 36 offre muséale groupe 1 mucem NaN \n",
|
||
"2 36 offre muséale individuel 1 mucem NaN \n",
|
||
"3 36 spectacle vivant 1 mucem NaN \n",
|
||
"4 36 spectacle vivant 1 mucem NaN "
|
||
]
|
||
},
|
||
"execution_count": 272,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n",
|
||
"print(\"Columns events_theme : \", events_theme.columns)\n",
|
||
"events_theme = events_theme.drop(columns = 'id')\n",
|
||
"events_theme.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 275,
|
||
"id": "6ed0ad20-8315-4112-9a85-10e5f04ef852",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def create_events_table():\n",
|
||
" # first merge events and seasons : \n",
|
||
" print(\"first merge events and seasons : \")\n",
|
||
" events = load_dataset(\"1events.csv\")\n",
|
||
" seasons = load_dataset(\"1seasons.csv\")\n",
|
||
" events_theme = events.merge(seasons, how = 'left', left_on = 'season_id', right_on = 'id', suffixes=('_events', '_seasons'))\n",
|
||
"\n",
|
||
" # Secondly merge events_theme and event_types\n",
|
||
" print(\"Secondly merge events_theme and event_types : \")\n",
|
||
" event_types = load_dataset(\"1event_types.csv\")\n",
|
||
"\n",
|
||
" events_theme = events_theme.merge(event_types, how = 'left', left_on = 'event_type_id', right_on = 'id', suffixes=('_events', '_event_type'))\n",
|
||
" events_theme = events_theme.rename(columns = {\"name\" : \"name_event_types\"})\n",
|
||
" events_theme = events_theme.drop(columns = 'id')\n",
|
||
"\n",
|
||
" # thirdly merge events_theme and facilities\n",
|
||
" print(\"thirdly merge events_theme and facilities : \")\n",
|
||
" facilities = load_dataset(\"1facilities.csv\")\n",
|
||
" events_theme = events_theme.merge(facilities, how = 'left', left_on = 'facility_id', right_on = 'id', suffixes=('_events', '_facility'))\n",
|
||
" events_theme = events_theme.rename(columns = {\"name\" : \"name_facilties\"})\n",
|
||
" events_theme = events_theme.drop(columns = 'id')\n",
|
||
" \n",
|
||
" return events_theme"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 276,
|
||
"id": "98ef0636-8c45-4a23-a62a-1fbe1544f8ce",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"first merge events and seasons : \n",
|
||
"File path : bdc2324-data/1/1events.csv\n",
|
||
"Shape : (1232, 12)\n",
|
||
"Number of columns : 10\n",
|
||
"Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n",
|
||
" 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n",
|
||
" dtype='object')\n",
|
||
"File path : bdc2324-data/1/1seasons.csv\n",
|
||
"Shape : (13, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n",
|
||
"Secondly merge events_theme and event_types : \n",
|
||
"File path : bdc2324-data/1/1event_types.csv\n",
|
||
"Shape : (9, 6)\n",
|
||
"Number of columns : 4\n",
|
||
"Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n",
|
||
"thirdly merge events_theme and facilities : \n",
|
||
"File path : bdc2324-data/1/1facilities.csv\n",
|
||
"Shape : (2, 7)\n",
|
||
"Number of columns : 5\n",
|
||
"Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id_events</th>\n",
|
||
" <th>season_id</th>\n",
|
||
" <th>facility_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>event_type_key_id</th>\n",
|
||
" <th>facility_key_id</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>manual_added</th>\n",
|
||
" <th>is_display</th>\n",
|
||
" <th>id_seasons</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>fidelity_delay</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>name_facilties</th>\n",
|
||
" <th>fixed_capacity</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>192</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>frontières</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>30329</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite guidée une autre histoire du monde (1h00)</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>2767</td>\n",
|
||
" <td>2023</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>161</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>visite contée les chercheurs d'or indiv</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>2018</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5957</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>we dreamt of utopia and we woke up screaming.</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8337</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>jeff koons épisodes 4</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>582</td>\n",
|
||
" <td>2021</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>mucem</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id_events season_id facility_id event_type_id event_type_key_id \\\n",
|
||
"0 192 16 1 4 4 \n",
|
||
"1 30329 2767 1 5 5 \n",
|
||
"2 161 16 1 2 2 \n",
|
||
"3 5957 582 1 4 4 \n",
|
||
"4 8337 582 1 4 4 \n",
|
||
"\n",
|
||
" facility_key_id name_events \\\n",
|
||
"0 1 frontières \n",
|
||
"1 1 visite guidée une autre histoire du monde (1h00) \n",
|
||
"2 1 visite contée les chercheurs d'or indiv \n",
|
||
"3 1 we dreamt of utopia and we woke up screaming. \n",
|
||
"4 1 jeff koons épisodes 4 \n",
|
||
"\n",
|
||
" manual_added is_display id_seasons name_seasons start_date_time \\\n",
|
||
"0 False True 16 2018 NaN \n",
|
||
"1 False True 2767 2023 NaN \n",
|
||
"2 False True 16 2018 NaN \n",
|
||
"3 False True 582 2021 NaN \n",
|
||
"4 False True 582 2021 NaN \n",
|
||
"\n",
|
||
" fidelity_delay name_event_types street_id name_facilties \\\n",
|
||
"0 36 spectacle vivant 1 mucem \n",
|
||
"1 36 offre muséale groupe 1 mucem \n",
|
||
"2 36 offre muséale individuel 1 mucem \n",
|
||
"3 36 spectacle vivant 1 mucem \n",
|
||
"4 36 spectacle vivant 1 mucem \n",
|
||
"\n",
|
||
" fixed_capacity \n",
|
||
"0 NaN \n",
|
||
"1 NaN \n",
|
||
"2 NaN \n",
|
||
"3 NaN \n",
|
||
"4 NaN "
|
||
]
|
||
},
|
||
"execution_count": 276,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = create_events_table()\n",
|
||
"df.head()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.13"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|