events_theme #2
|
@ -10,7 +10,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"id": "20eeb149-6618-4ef2-9cfd-ff062950f36c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -22,7 +22,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"id": "30494c5e-9649-4fff-8708-617544188b20",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -46,7 +46,7 @@
|
|||
" 'bdc2324-data/9']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -78,7 +78,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"id": "f1cce705-46e1-42de-8e93-2ee15312d288",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -88,7 +88,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"id": "82d4db0e-0cd5-49af-a4d3-f17f54b1c03c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -136,7 +136,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 9,
|
||||
"id": "65cb38ad-52ae-4266-85d8-c47d81b00283",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -715,7 +715,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 10,
|
||||
"id": "590a132a-4f57-4ea3-a282-2ef913e4b753",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -725,7 +725,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 11,
|
||||
"id": "0fbebfb7-a827-46b1-890b-86c9def7cdbb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -735,7 +735,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 12,
|
||||
"id": "b8aa5f8f-845e-4ee5-b80d-38b7061a94a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -750,7 +750,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 13,
|
||||
"id": "2c478213-09ae-44ef-8c7c-125bcb571642",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -768,7 +768,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 14,
|
||||
"id": "327e44b0-eb99-4022-b4ca-79548072f0f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -781,6 +781,22 @@
|
|||
" return percent_missing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "10926def-267f-4e86-b2c9-72e27ff9a9df",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def process_df(df):\n",
|
||||
" df = remove_horodates(df)\n",
|
||||
" print(\"Number of columns : \", len(df.columns))\n",
|
||||
" df = order_columns_id(df)\n",
|
||||
" print(\"Columns : \", df.columns)\n",
|
||||
" print(\"Percent of NA for each column : \", percent_na(df))\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98ac02cb-5295-47ca-99c6-99e622c5f388",
|
||||
|
@ -791,7 +807,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 15,
|
||||
"id": "862a7658-0602-4d94-bb58-d23774c00d32",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -961,7 +977,7 @@
|
|||
"4 NaN f1c4689bc47dee6f60b56d74b593dd46 "
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -974,7 +990,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 16,
|
||||
"id": "f0db8c51-2792-4d49-9b1a-d98ce0d9ea28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -1127,7 +1143,7 @@
|
|||
"4 8.5 False 0.0 NaN NaN "
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1142,7 +1158,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 17,
|
||||
"id": "a383474f-7da9-422c-bb69-3f0cc0b7053f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -1172,7 +1188,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 18,
|
||||
"id": "460749ac-aa26-4216-8667-518546f72f72",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -1200,6 +1216,343 @@
|
|||
"percent_missing = products.isna().sum() * 100 / len(products)\n",
|
||||
"print(percent_missing)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ebcb48ab-adad-42e5-b5d7-7275771cd200",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deep analysis of categories.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "3efce2b6-2d2f-4da9-98ed-1aae17da624c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"name_dataset = '1categories.csv'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "38aa39fd-58af-4fb8-98f2-4269dbaf35de",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File path : bdc2324-data/1/1categories.csv\n",
|
||||
"Shape : (27, 7)\n",
|
||||
"Number of columns : 7\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>updated_at</th>\n",
|
||||
" <th>extra_field</th>\n",
|
||||
" <th>quota</th>\n",
|
||||
" <th>identifier</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>en nb entrées gr</td>\n",
|
||||
" <td>2020-09-03 13:21:20.019202+02:00</td>\n",
|
||||
" <td>2020-09-03 13:21:20.019202+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>849ab2791a14f5fc2bb4d87ab2b78bf6</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>16</td>\n",
|
||||
" <td>indiv activité enfant</td>\n",
|
||||
" <td>2020-09-03 13:11:23.306968+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.306968+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>425fd2f01984cc4ba030c1be98f42c33</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>39</td>\n",
|
||||
" <td>indiv activité gr</td>\n",
|
||||
" <td>2020-09-03 13:21:20.029901+02:00</td>\n",
|
||||
" <td>2020-09-03 13:21:20.029901+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>9244dd3738788db0d22a5d0afe687b69</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1108</td>\n",
|
||||
" <td>groupe forfait adulte</td>\n",
|
||||
" <td>2020-09-19 02:06:43.145697+02:00</td>\n",
|
||||
" <td>2020-09-19 02:06:43.145697+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>3edda20c877a93b5ff883827238eb711</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>groupe forfait entrées tr</td>\n",
|
||||
" <td>2020-09-03 13:11:23.264997+02:00</td>\n",
|
||||
" <td>2020-09-03 13:11:23.264997+02:00</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>ff48df4b2dd5a14116bf4d280b31621e</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" id name created_at \\\n",
|
||||
"0 30 en nb entrées gr 2020-09-03 13:21:20.019202+02:00 \n",
|
||||
"1 16 indiv activité enfant 2020-09-03 13:11:23.306968+02:00 \n",
|
||||
"2 39 indiv activité gr 2020-09-03 13:21:20.029901+02:00 \n",
|
||||
"3 1108 groupe forfait adulte 2020-09-19 02:06:43.145697+02:00 \n",
|
||||
"4 6 groupe forfait entrées tr 2020-09-03 13:11:23.264997+02:00 \n",
|
||||
"\n",
|
||||
" updated_at extra_field quota \\\n",
|
||||
"0 2020-09-03 13:21:20.019202+02:00 NaN NaN \n",
|
||||
"1 2020-09-03 13:11:23.306968+02:00 NaN NaN \n",
|
||||
"2 2020-09-03 13:21:20.029901+02:00 NaN NaN \n",
|
||||
"3 2020-09-19 02:06:43.145697+02:00 NaN NaN \n",
|
||||
"4 2020-09-03 13:11:23.264997+02:00 NaN NaN \n",
|
||||
"\n",
|
||||
" identifier \n",
|
||||
"0 849ab2791a14f5fc2bb4d87ab2b78bf6 \n",
|
||||
"1 425fd2f01984cc4ba030c1be98f42c33 \n",
|
||||
"2 9244dd3738788db0d22a5d0afe687b69 \n",
|
||||
"3 3edda20c877a93b5ff883827238eb711 \n",
|
||||
"4 ff48df4b2dd5a14116bf4d280b31621e "
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = display_databases(name_dataset)\n",
|
||||
"print(\"Number of columns : \", len(df.columns))\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "99eb6d14-8b4b-4d55-8fc7-ddf2726096f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of columns : 5\n",
|
||||
"Columns : Index(['id', 'identifier', 'name', 'extra_field', 'quota'], dtype='object')\n",
|
||||
"Percent of NA for each column : id 0.000000\n",
|
||||
"identifier 0.000000\n",
|
||||
"name 3.703704\n",
|
||||
"extra_field 100.000000\n",
|
||||
"quota 100.000000\n",
|
||||
"dtype: float64\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>identifier</th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>extra_field</th>\n",
|
||||
" <th>quota</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>849ab2791a14f5fc2bb4d87ab2b78bf6</td>\n",
|
||||
" <td>en nb entrées gr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>16</td>\n",
|
||||
" <td>425fd2f01984cc4ba030c1be98f42c33</td>\n",
|
||||
" <td>indiv activité enfant</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>39</td>\n",
|
||||
" <td>9244dd3738788db0d22a5d0afe687b69</td>\n",
|
||||
" <td>indiv activité gr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1108</td>\n",
|
||||
" <td>3edda20c877a93b5ff883827238eb711</td>\n",
|
||||
" <td>groupe forfait adulte</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>ff48df4b2dd5a14116bf4d280b31621e</td>\n",
|
||||
" <td>groupe forfait entrées tr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" id identifier name \\\n",
|
||||
"0 30 849ab2791a14f5fc2bb4d87ab2b78bf6 en nb entrées gr \n",
|
||||
"1 16 425fd2f01984cc4ba030c1be98f42c33 indiv activité enfant \n",
|
||||
"2 39 9244dd3738788db0d22a5d0afe687b69 indiv activité gr \n",
|
||||
"3 1108 3edda20c877a93b5ff883827238eb711 groupe forfait adulte \n",
|
||||
"4 6 ff48df4b2dd5a14116bf4d280b31621e groupe forfait entrées tr \n",
|
||||
"\n",
|
||||
" extra_field quota \n",
|
||||
"0 NaN NaN \n",
|
||||
"1 NaN NaN \n",
|
||||
"2 NaN NaN \n",
|
||||
"3 NaN NaN \n",
|
||||
"4 NaN NaN "
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = process_df(df)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "c5f39cc9-dff8-452c-9a3e-9f7df81a8a19",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"id int64\n",
|
||||
"identifier object\n",
|
||||
"name object\n",
|
||||
"extra_field float64\n",
|
||||
"quota float64\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c4cb0b37-2262-45c0-97be-b12c503016e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deep analysis of type_of_categories.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b4a3af9-ed12-43ec-b17e-fd425b238265",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deep analysis of representation_category_capacities.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "135966fb-aab1-48d7-bb4c-39a53ee643ca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deep analysis of representations.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b480f39f-d5c7-4ded-8f64-ea8ac31f5db5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deep analysis of events.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d52d6da-cca5-4abd-be05-2f00fd3eca8e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Loading…
Reference in New Issue
Block a user