From 73d24ab489159d97b3c3917a09a805f3eb301d1c Mon Sep 17 00:00:00 2001 From: arevelle-ensae Date: Sat, 13 Jan 2024 10:57:51 +0000 Subject: [PATCH] dict variable --- Notebook_AR.ipynb | 1147 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 1145 insertions(+), 2 deletions(-) diff --git a/Notebook_AR.ipynb b/Notebook_AR.ipynb index 9de5bc6..3f47098 100644 --- a/Notebook_AR.ipynb +++ b/Notebook_AR.ipynb @@ -1548,11 +1548,1154 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "2d52d6da-cca5-4abd-be05-2f00fd3eca8e", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "name_dataset = '1events.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "6cab507d-8b11-404d-9286-5cc205228af9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : bdc2324-data/1/1events.csv\n", + "Shape : (1232, 12)\n", + "Number of columns : 12\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atupdated_atseason_idfacility_idnameevent_type_idmanual_addedis_displayevent_type_key_idfacility_key_ididentifier
01922020-09-03 13:36:42.216991+02:002021-11-02 15:06:40.663219+01:00161frontières4FalseTrue41c1cecd093146068fd57896e254e98170
1303292023-11-04 02:50:34.602462+01:002023-11-04 02:52:26.138154+01:0027671visite guidée une autre histoire du monde (1h00)5FalseTrue51f510a6710878d7aca36e71c54abab525
21612020-09-03 13:29:27.944002+02:002021-11-02 15:06:40.652026+01:00161visite contée les chercheurs d'or indiv2FalseTrue2121177fa9acad1ae2b1f595690fb853d3
359572021-07-31 11:16:42.575583+02:002021-11-02 15:06:40.663219+01:005821we dreamt of utopia and we woke up screaming.4FalseTrue41962601f1eb153d45d49437f8fe839f7f
483372021-08-17 13:40:34.111923+02:002021-11-02 15:06:40.663219+01:005821jeff koons épisodes 44FalseTrue41bfa22f5a2364a2dacfc45cca1c8d3215
\n", + "
" + ], + "text/plain": [ + " id created_at updated_at \\\n", + "0 192 2020-09-03 13:36:42.216991+02:00 2021-11-02 15:06:40.663219+01:00 \n", + "1 30329 2023-11-04 02:50:34.602462+01:00 2023-11-04 02:52:26.138154+01:00 \n", + "2 161 2020-09-03 13:29:27.944002+02:00 2021-11-02 15:06:40.652026+01:00 \n", + "3 5957 2021-07-31 11:16:42.575583+02:00 2021-11-02 15:06:40.663219+01:00 \n", + "4 8337 2021-08-17 13:40:34.111923+02:00 2021-11-02 15:06:40.663219+01:00 \n", + "\n", + " season_id facility_id name \\\n", + "0 16 1 frontières \n", + "1 2767 1 visite guidée une autre histoire du monde (1h00) \n", + "2 16 1 visite contée les chercheurs d'or indiv \n", + "3 582 1 we dreamt of utopia and we woke up screaming. \n", + "4 582 1 jeff koons épisodes 4 \n", + "\n", + " event_type_id manual_added is_display event_type_key_id \\\n", + "0 4 False True 4 \n", + "1 5 False True 5 \n", + "2 2 False True 2 \n", + "3 4 False True 4 \n", + "4 4 False True 4 \n", + "\n", + " facility_key_id identifier \n", + "0 1 c1cecd093146068fd57896e254e98170 \n", + "1 1 f510a6710878d7aca36e71c54abab525 \n", + "2 1 21177fa9acad1ae2b1f595690fb853d3 \n", + "3 1 962601f1eb153d45d49437f8fe839f7f \n", + "4 1 bfa22f5a2364a2dacfc45cca1c8d3215 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = display_databases(name_dataset)\n", + "print(\"Number of columns : \", len(df.columns))\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "9fe57873-8108-44c9-b8a5-f58d3cbb6d17", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of columns : 10\n", + "Columns : Index(['id', 'season_id', 'facility_id', 'event_type_id', 'event_type_key_id',\n", + " 'facility_key_id', 'identifier', 'name', 'manual_added', 'is_display'],\n", + " dtype='object')\n", + "Percent of NA for each column : id 0.000000\n", + "season_id 0.000000\n", + "facility_id 0.000000\n", + "event_type_id 0.000000\n", + "event_type_key_id 0.000000\n", + "facility_key_id 0.000000\n", + "identifier 0.000000\n", + "name 0.974026\n", + "manual_added 0.000000\n", + "is_display 0.000000\n", + "dtype: float64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idseason_idfacility_idevent_type_idevent_type_key_idfacility_key_ididentifiernamemanual_addedis_display
0192161441c1cecd093146068fd57896e254e98170frontièresFalseTrue
13032927671551f510a6710878d7aca36e71c54abab525visite guidée une autre histoire du monde (1h00)FalseTrue
216116122121177fa9acad1ae2b1f595690fb853d3visite contée les chercheurs d'or indivFalseTrue
359575821441962601f1eb153d45d49437f8fe839f7fwe dreamt of utopia and we woke up screaming.FalseTrue
483375821441bfa22f5a2364a2dacfc45cca1c8d3215jeff koons épisodes 4FalseTrue
\n", + "
" + ], + "text/plain": [ + " id season_id facility_id event_type_id event_type_key_id \\\n", + "0 192 16 1 4 4 \n", + "1 30329 2767 1 5 5 \n", + "2 161 16 1 2 2 \n", + "3 5957 582 1 4 4 \n", + "4 8337 582 1 4 4 \n", + "\n", + " facility_key_id identifier \\\n", + "0 1 c1cecd093146068fd57896e254e98170 \n", + "1 1 f510a6710878d7aca36e71c54abab525 \n", + "2 1 21177fa9acad1ae2b1f595690fb853d3 \n", + "3 1 962601f1eb153d45d49437f8fe839f7f \n", + "4 1 bfa22f5a2364a2dacfc45cca1c8d3215 \n", + "\n", + " name manual_added is_display \n", + "0 frontières False True \n", + "1 visite guidée une autre histoire du monde (1h00) False True \n", + "2 visite contée les chercheurs d'or indiv False True \n", + "3 we dreamt of utopia and we woke up screaming. False True \n", + "4 jeff koons épisodes 4 False True " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = process_df(df)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7fd9e5bd-baac-4b3b-9ffb-5a9baa18399b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id int64\n", + "season_id int64\n", + "facility_id int64\n", + "event_type_id int64\n", + "event_type_key_id int64\n", + "facility_key_id int64\n", + "identifier object\n", + "name object\n", + "manual_added bool\n", + "is_display bool\n", + "dtype: object" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "id": "24186efa-5908-4b03-bf52-96415fc8bd54", + "metadata": {}, + "source": [ + "#### Deep analysis of event_types.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "90ab62d4-a086-4469-961c-67eefb375388", + "metadata": {}, + "outputs": [], + "source": [ + "name_dataset = '1event_types.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "58db1751-fd56-4c28-b49e-bc8235bb0dc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : bdc2324-data/1/1event_types.csv\n", + "Shape : (9, 6)\n", + "Number of columns : 6\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecreated_atupdated_atfidelity_delayidentifier
01standard2020-09-03 12:24:22.574262+02:002020-09-03 12:24:22.574262+02:0036c00f0c4675b91fb8b918e4079a0b1bac
166package2020-09-03 14:05:04.648137+02:002020-09-03 14:05:04.648137+02:0036efe90a8e604a7c840e88d03a67f6b7d8
283guide multimédias2020-09-03 14:15:17.252539+02:002020-09-03 14:15:17.252539+02:0036ee14c62b3b9f6c7dd5401685a18e4460
33non défini2020-09-03 13:11:23.117024+02:002020-09-03 13:11:23.117024+02:003652ff3466787b4d538407372e5f7afe0f
42723NaN2021-12-22 09:45:47.715105+01:002021-12-22 09:45:47.715105+01:0036d41d8cd98f00b204e9800998ecf8427e
\n", + "
" + ], + "text/plain": [ + " id name created_at \\\n", + "0 1 standard 2020-09-03 12:24:22.574262+02:00 \n", + "1 66 package 2020-09-03 14:05:04.648137+02:00 \n", + "2 83 guide multimédias 2020-09-03 14:15:17.252539+02:00 \n", + "3 3 non défini 2020-09-03 13:11:23.117024+02:00 \n", + "4 2723 NaN 2021-12-22 09:45:47.715105+01:00 \n", + "\n", + " updated_at fidelity_delay \\\n", + "0 2020-09-03 12:24:22.574262+02:00 36 \n", + "1 2020-09-03 14:05:04.648137+02:00 36 \n", + "2 2020-09-03 14:15:17.252539+02:00 36 \n", + "3 2020-09-03 13:11:23.117024+02:00 36 \n", + "4 2021-12-22 09:45:47.715105+01:00 36 \n", + "\n", + " identifier \n", + "0 c00f0c4675b91fb8b918e4079a0b1bac \n", + "1 efe90a8e604a7c840e88d03a67f6b7d8 \n", + "2 ee14c62b3b9f6c7dd5401685a18e4460 \n", + "3 52ff3466787b4d538407372e5f7afe0f \n", + "4 d41d8cd98f00b204e9800998ecf8427e " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = display_databases(name_dataset)\n", + "print(\"Number of columns : \", len(df.columns))\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "ac93382c-0b5f-462d-8021-0dd1e7201b8c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of columns : 4\n", + "Columns : Index(['id', 'fidelity_delay', 'identifier', 'name'], dtype='object')\n", + "Percent of NA for each column : id 0.000000\n", + "fidelity_delay 0.000000\n", + "identifier 0.000000\n", + "name 11.111111\n", + "dtype: float64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idfidelity_delayidentifiername
0136c00f0c4675b91fb8b918e4079a0b1bacstandard
16636efe90a8e604a7c840e88d03a67f6b7d8package
28336ee14c62b3b9f6c7dd5401685a18e4460guide multimédias
333652ff3466787b4d538407372e5f7afe0fnon défini
4272336d41d8cd98f00b204e9800998ecf8427eNaN
\n", + "
" + ], + "text/plain": [ + " id fidelity_delay identifier name\n", + "0 1 36 c00f0c4675b91fb8b918e4079a0b1bac standard\n", + "1 66 36 efe90a8e604a7c840e88d03a67f6b7d8 package\n", + "2 83 36 ee14c62b3b9f6c7dd5401685a18e4460 guide multimédias\n", + "3 3 36 52ff3466787b4d538407372e5f7afe0f non défini\n", + "4 2723 36 d41d8cd98f00b204e9800998ecf8427e NaN" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = process_df(df)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "18cbd630-3c7d-49e1-932b-9460badf3758", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id int64\n", + "fidelity_delay int64\n", + "identifier object\n", + "name object\n", + "dtype: object" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "id": "5847a441-31b9-4802-a5ae-90d8c6d6e153", + "metadata": {}, + "source": [ + "#### Deep analysis of seasons.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "ae544dcc-f23d-4216-bb5b-597cc1b3765e", + "metadata": {}, + "outputs": [], + "source": [ + "name_dataset = '1seasons.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "1ac97963-9208-4329-be41-d71a5797487f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : bdc2324-data/1/1seasons.csv\n", + "Shape : (13, 6)\n", + "Number of columns : 6\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecreated_atupdated_atstart_date_timeidentifier
094320132021-07-29 08:55:33.282607+02:002021-07-29 08:55:33.282607+02:00NaN8038da89e49ac5eabb489cfc6cea9fc1
112920142020-09-03 15:13:08.105567+02:002020-09-03 15:13:08.105567+02:00NaNcee8d6b7ce52554fd70354e37bbf44a2
2320152020-09-03 13:11:19.405037+02:002020-09-03 13:11:19.405037+02:00NaN65d2ea03425887a717c435081cfc5dbb
3220162020-09-03 13:11:19.401001+02:002020-09-03 13:11:19.401001+02:00NaN95192c98732387165bf8e396c0f2dad2
4420172020-09-03 13:11:19.409005+02:002020-09-03 13:11:19.409005+02:00NaN8d8818c8e140c64c743113f563cf750f
\n", + "
" + ], + "text/plain": [ + " id name created_at \\\n", + "0 943 2013 2021-07-29 08:55:33.282607+02:00 \n", + "1 129 2014 2020-09-03 15:13:08.105567+02:00 \n", + "2 3 2015 2020-09-03 13:11:19.405037+02:00 \n", + "3 2 2016 2020-09-03 13:11:19.401001+02:00 \n", + "4 4 2017 2020-09-03 13:11:19.409005+02:00 \n", + "\n", + " updated_at start_date_time \\\n", + "0 2021-07-29 08:55:33.282607+02:00 NaN \n", + "1 2020-09-03 15:13:08.105567+02:00 NaN \n", + "2 2020-09-03 13:11:19.405037+02:00 NaN \n", + "3 2020-09-03 13:11:19.401001+02:00 NaN \n", + "4 2020-09-03 13:11:19.409005+02:00 NaN \n", + "\n", + " identifier \n", + "0 8038da89e49ac5eabb489cfc6cea9fc1 \n", + "1 cee8d6b7ce52554fd70354e37bbf44a2 \n", + "2 65d2ea03425887a717c435081cfc5dbb \n", + "3 95192c98732387165bf8e396c0f2dad2 \n", + "4 8d8818c8e140c64c743113f563cf750f " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = display_databases(name_dataset)\n", + "print(\"Number of columns : \", len(df.columns))\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "b4593d46-105c-47dd-aa71-babd8e63e65b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of columns : 4\n", + "Columns : Index(['id', 'identifier', 'name', 'start_date_time'], dtype='object')\n", + "Percent of NA for each column : id 0.000000\n", + "identifier 0.000000\n", + "name 7.692308\n", + "start_date_time 100.000000\n", + "dtype: float64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ididentifiernamestart_date_time
09438038da89e49ac5eabb489cfc6cea9fc12013NaN
1129cee8d6b7ce52554fd70354e37bbf44a22014NaN
2365d2ea03425887a717c435081cfc5dbb2015NaN
3295192c98732387165bf8e396c0f2dad22016NaN
448d8818c8e140c64c743113f563cf750f2017NaN
\n", + "
" + ], + "text/plain": [ + " id identifier name start_date_time\n", + "0 943 8038da89e49ac5eabb489cfc6cea9fc1 2013 NaN\n", + "1 129 cee8d6b7ce52554fd70354e37bbf44a2 2014 NaN\n", + "2 3 65d2ea03425887a717c435081cfc5dbb 2015 NaN\n", + "3 2 95192c98732387165bf8e396c0f2dad2 2016 NaN\n", + "4 4 8d8818c8e140c64c743113f563cf750f 2017 NaN" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = process_df(df)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "5d3b096d-8e73-4514-94e5-f2dcd4d0a89c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id int64\n", + "identifier object\n", + "name object\n", + "start_date_time float64\n", + "dtype: object" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "id": "a7b00bc7-eae6-457c-ac68-a4a55a6d1c8c", + "metadata": {}, + "source": [ + "#### Deep Analysis of facilities.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "d95ef015-d44c-4353-8761-771b910d21c9", + "metadata": {}, + "outputs": [], + "source": [ + "name_dataset = '1facilities.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "ef5fe794-8df7-4f27-8554-ecdc4074ac0b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : bdc2324-data/1/1facilities.csv\n", + "Shape : (2, 7)\n", + "Number of columns : 7\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecreated_atupdated_atstreet_idfixed_capacityidentifier
02non défini2020-09-03 13:16:35.293111+02:002020-09-03 13:16:35.293111+02:002NaN52ff3466787b4d538407372e5f7afe0f
11mucem2020-09-03 13:11:23.133059+02:002020-09-03 13:11:23.133059+02:001NaN702bd76fe3dd5dbcf118a6965a946f54
\n", + "
" + ], + "text/plain": [ + " id name created_at \\\n", + "0 2 non défini 2020-09-03 13:16:35.293111+02:00 \n", + "1 1 mucem 2020-09-03 13:11:23.133059+02:00 \n", + "\n", + " updated_at street_id fixed_capacity \\\n", + "0 2020-09-03 13:16:35.293111+02:00 2 NaN \n", + "1 2020-09-03 13:11:23.133059+02:00 1 NaN \n", + "\n", + " identifier \n", + "0 52ff3466787b4d538407372e5f7afe0f \n", + "1 702bd76fe3dd5dbcf118a6965a946f54 " + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = display_databases(name_dataset)\n", + "print(\"Number of columns : \", len(df.columns))\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e3621201-fab9-49fd-95c1-0b9d5da76e50", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of columns : 5\n", + "Columns : Index(['id', 'street_id', 'identifier', 'name', 'fixed_capacity'], dtype='object')\n", + "Percent of NA for each column : id 0.0\n", + "street_id 0.0\n", + "identifier 0.0\n", + "name 0.0\n", + "fixed_capacity 100.0\n", + "dtype: float64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idstreet_ididentifiernamefixed_capacity
02252ff3466787b4d538407372e5f7afe0fnon définiNaN
111702bd76fe3dd5dbcf118a6965a946f54mucemNaN
\n", + "
" + ], + "text/plain": [ + " id street_id identifier name fixed_capacity\n", + "0 2 2 52ff3466787b4d538407372e5f7afe0f non défini NaN\n", + "1 1 1 702bd76fe3dd5dbcf118a6965a946f54 mucem NaN" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = process_df(df)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "1b198b92-8654-4531-a0dd-8f2e01c2e6c1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id int64\n", + "street_id int64\n", + "identifier object\n", + "name object\n", + "fixed_capacity float64\n", + "dtype: object" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] } ], "metadata": {