diff --git a/Notebook_AR.ipynb b/Notebook_AR.ipynb
index 3371cf3..9de5bc6 100644
--- a/Notebook_AR.ipynb
+++ b/Notebook_AR.ipynb
@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 5,
"id": "20eeb149-6618-4ef2-9cfd-ff062950f36c",
"metadata": {},
"outputs": [],
@@ -22,7 +22,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 6,
"id": "30494c5e-9649-4fff-8708-617544188b20",
"metadata": {},
"outputs": [
@@ -46,7 +46,7 @@
" 'bdc2324-data/9']"
]
},
- "execution_count": 3,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -78,7 +78,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 7,
"id": "f1cce705-46e1-42de-8e93-2ee15312d288",
"metadata": {},
"outputs": [],
@@ -88,7 +88,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 8,
"id": "82d4db0e-0cd5-49af-a4d3-f17f54b1c03c",
"metadata": {},
"outputs": [
@@ -136,7 +136,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 9,
"id": "65cb38ad-52ae-4266-85d8-c47d81b00283",
"metadata": {},
"outputs": [],
@@ -715,7 +715,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 10,
"id": "590a132a-4f57-4ea3-a282-2ef913e4b753",
"metadata": {},
"outputs": [],
@@ -725,7 +725,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 11,
"id": "0fbebfb7-a827-46b1-890b-86c9def7cdbb",
"metadata": {},
"outputs": [],
@@ -735,7 +735,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 12,
"id": "b8aa5f8f-845e-4ee5-b80d-38b7061a94a2",
"metadata": {},
"outputs": [],
@@ -750,7 +750,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 13,
"id": "2c478213-09ae-44ef-8c7c-125bcb571642",
"metadata": {},
"outputs": [],
@@ -768,7 +768,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 14,
"id": "327e44b0-eb99-4022-b4ca-79548072f0f0",
"metadata": {},
"outputs": [],
@@ -781,6 +781,22 @@
" return percent_missing"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "10926def-267f-4e86-b2c9-72e27ff9a9df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def process_df(df):\n",
+ " df = remove_horodates(df)\n",
+ " print(\"Number of columns : \", len(df.columns))\n",
+ " df = order_columns_id(df)\n",
+ " print(\"Columns : \", df.columns)\n",
+ " print(\"Percent of NA for each column : \", percent_na(df))\n",
+ " return df"
+ ]
+ },
{
"cell_type": "markdown",
"id": "98ac02cb-5295-47ca-99c6-99e622c5f388",
@@ -791,7 +807,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 15,
"id": "862a7658-0602-4d94-bb58-d23774c00d32",
"metadata": {},
"outputs": [
@@ -961,7 +977,7 @@
"4 NaN f1c4689bc47dee6f60b56d74b593dd46 "
]
},
- "execution_count": 32,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -974,7 +990,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 16,
"id": "f0db8c51-2792-4d49-9b1a-d98ce0d9ea28",
"metadata": {},
"outputs": [
@@ -1127,7 +1143,7 @@
"4 8.5 False 0.0 NaN NaN "
]
},
- "execution_count": 33,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -1142,7 +1158,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 17,
"id": "a383474f-7da9-422c-bb69-3f0cc0b7053f",
"metadata": {},
"outputs": [
@@ -1172,7 +1188,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 18,
"id": "460749ac-aa26-4216-8667-518546f72f72",
"metadata": {},
"outputs": [
@@ -1200,6 +1216,343 @@
"percent_missing = products.isna().sum() * 100 / len(products)\n",
"print(percent_missing)"
]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ebcb48ab-adad-42e5-b5d7-7275771cd200",
+ "metadata": {},
+ "source": [
+ "#### Deep analysis of categories.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "3efce2b6-2d2f-4da9-98ed-1aae17da624c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "name_dataset = '1categories.csv'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "38aa39fd-58af-4fb8-98f2-4269dbaf35de",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "File path : bdc2324-data/1/1categories.csv\n",
+ "Shape : (27, 7)\n",
+ "Number of columns : 7\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " extra_field | \n",
+ " quota | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 30 | \n",
+ " en nb entrées gr | \n",
+ " 2020-09-03 13:21:20.019202+02:00 | \n",
+ " 2020-09-03 13:21:20.019202+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 849ab2791a14f5fc2bb4d87ab2b78bf6 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 16 | \n",
+ " indiv activité enfant | \n",
+ " 2020-09-03 13:11:23.306968+02:00 | \n",
+ " 2020-09-03 13:11:23.306968+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 425fd2f01984cc4ba030c1be98f42c33 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 39 | \n",
+ " indiv activité gr | \n",
+ " 2020-09-03 13:21:20.029901+02:00 | \n",
+ " 2020-09-03 13:21:20.029901+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 9244dd3738788db0d22a5d0afe687b69 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1108 | \n",
+ " groupe forfait adulte | \n",
+ " 2020-09-19 02:06:43.145697+02:00 | \n",
+ " 2020-09-19 02:06:43.145697+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 3edda20c877a93b5ff883827238eb711 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 6 | \n",
+ " groupe forfait entrées tr | \n",
+ " 2020-09-03 13:11:23.264997+02:00 | \n",
+ " 2020-09-03 13:11:23.264997+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ff48df4b2dd5a14116bf4d280b31621e | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name created_at \\\n",
+ "0 30 en nb entrées gr 2020-09-03 13:21:20.019202+02:00 \n",
+ "1 16 indiv activité enfant 2020-09-03 13:11:23.306968+02:00 \n",
+ "2 39 indiv activité gr 2020-09-03 13:21:20.029901+02:00 \n",
+ "3 1108 groupe forfait adulte 2020-09-19 02:06:43.145697+02:00 \n",
+ "4 6 groupe forfait entrées tr 2020-09-03 13:11:23.264997+02:00 \n",
+ "\n",
+ " updated_at extra_field quota \\\n",
+ "0 2020-09-03 13:21:20.019202+02:00 NaN NaN \n",
+ "1 2020-09-03 13:11:23.306968+02:00 NaN NaN \n",
+ "2 2020-09-03 13:21:20.029901+02:00 NaN NaN \n",
+ "3 2020-09-19 02:06:43.145697+02:00 NaN NaN \n",
+ "4 2020-09-03 13:11:23.264997+02:00 NaN NaN \n",
+ "\n",
+ " identifier \n",
+ "0 849ab2791a14f5fc2bb4d87ab2b78bf6 \n",
+ "1 425fd2f01984cc4ba030c1be98f42c33 \n",
+ "2 9244dd3738788db0d22a5d0afe687b69 \n",
+ "3 3edda20c877a93b5ff883827238eb711 \n",
+ "4 ff48df4b2dd5a14116bf4d280b31621e "
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = display_databases(name_dataset)\n",
+ "print(\"Number of columns : \", len(df.columns))\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "99eb6d14-8b4b-4d55-8fc7-ddf2726096f4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of columns : 5\n",
+ "Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n",
+ "Percent of NA for each column : id 0.000000\n",
+ "identifier 0.000000\n",
+ "name 3.703704\n",
+ "extra_field 100.000000\n",
+ "quota 100.000000\n",
+ "dtype: float64\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " identifier | \n",
+ " name | \n",
+ " extra_field | \n",
+ " quota | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 30 | \n",
+ " 849ab2791a14f5fc2bb4d87ab2b78bf6 | \n",
+ " en nb entrées gr | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 16 | \n",
+ " 425fd2f01984cc4ba030c1be98f42c33 | \n",
+ " indiv activité enfant | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 39 | \n",
+ " 9244dd3738788db0d22a5d0afe687b69 | \n",
+ " indiv activité gr | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1108 | \n",
+ " 3edda20c877a93b5ff883827238eb711 | \n",
+ " groupe forfait adulte | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 6 | \n",
+ " ff48df4b2dd5a14116bf4d280b31621e | \n",
+ " groupe forfait entrées tr | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id identifier name \\\n",
+ "0 30 849ab2791a14f5fc2bb4d87ab2b78bf6 en nb entrées gr \n",
+ "1 16 425fd2f01984cc4ba030c1be98f42c33 indiv activité enfant \n",
+ "2 39 9244dd3738788db0d22a5d0afe687b69 indiv activité gr \n",
+ "3 1108 3edda20c877a93b5ff883827238eb711 groupe forfait adulte \n",
+ "4 6 ff48df4b2dd5a14116bf4d280b31621e groupe forfait entrées tr \n",
+ "\n",
+ " extra_field quota \n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN "
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = process_df(df)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "c5f39cc9-dff8-452c-9a3e-9f7df81a8a19",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "identifier object\n",
+ "name object\n",
+ "extra_field float64\n",
+ "quota float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c4cb0b37-2262-45c0-97be-b12c503016e3",
+ "metadata": {},
+ "source": [
+ "#### Deep analysis of type_of_categories.csv"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b4a3af9-ed12-43ec-b17e-fd425b238265",
+ "metadata": {},
+ "source": [
+ "#### Deep analysis of representation_category_capacities.csv"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "135966fb-aab1-48d7-bb4c-39a53ee643ca",
+ "metadata": {},
+ "source": [
+ "#### Deep analysis of representations.csv"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b480f39f-d5c7-4ded-8f64-ea8ac31f5db5",
+ "metadata": {},
+ "source": [
+ "#### Deep analysis of events.csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2d52d6da-cca5-4abd-be05-2f00fd3eca8e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {