Ajout analyse descriptive

This commit is contained in:
Antoine JOUBREL 2024-02-14 11:44:57 +00:00
parent c67337b10e
commit 2c4a1c6761
2 changed files with 537 additions and 61 deletions

View File

@ -772,6 +772,462 @@
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "82895dfc-e5ca-4be0-af24-93c1be8f6248",
"metadata": {},
"source": [
"### Proportion de tickets de prix 0"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "6e27dd83-f188-43a5-b595-618b4922a358",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ticket_id 0.418220\n",
"customer_id 0.418220\n",
"purchase_id 0.418220\n",
"event_type_id 0.418220\n",
"supplier_name 0.418220\n",
"purchase_date 0.418220\n",
"type_of_ticket_name 0.418220\n",
"amount 0.418220\n",
"children 0.418220\n",
"is_full_price 0.418220\n",
"name_event_types 0.418220\n",
"name_facilities 0.418220\n",
"name_categories 0.402548\n",
"name_events 0.175585\n",
"name_seasons 0.418220\n",
"dtype: float64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases[purchases['amount'] == 0].count()/len(purchases)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "f663d68b-8a5c-4804-b31a-4477a03ca1e4",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>purchase_id</th>\n",
" <th>ticket_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>73518.000000</td>\n",
" <td>7.351800e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>10.096167</td>\n",
" <td>2.484660e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2367.702603</td>\n",
" <td>4.636993e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.000000</td>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.000000</td>\n",
" <td>3.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>641981.000000</td>\n",
" <td>1.256574e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" purchase_id ticket_id\n",
"count 73518.000000 7.351800e+04\n",
"mean 10.096167 2.484660e+01\n",
"std 2367.702603 4.636993e+03\n",
"min 1.000000 1.000000e+00\n",
"25% 1.000000 1.000000e+00\n",
"50% 1.000000 2.000000e+00\n",
"75% 1.000000 3.000000e+00\n",
"max 641981.000000 1.256574e+06"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases.groupby('customer_id')[['purchase_id', 'ticket_id']].nunique().describe()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d1212b10-3933-450a-b001-9e2cbf308f79",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>type_of_ticket_name</th>\n",
" <th>amount</th>\n",
" <th>children</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13070859</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>8.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13070860</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13070861</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13070862</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13070863</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826667</th>\n",
" <td>20662815</td>\n",
" <td>1256135</td>\n",
" <td>8007697</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 17:23:54+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826668</th>\n",
" <td>20662816</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826669</th>\n",
" <td>20662817</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826670</th>\n",
" <td>20662818</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826671</th>\n",
" <td>20662819</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1826672 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 13070859 48187 5107462 4 vente en ligne \n",
"1 13070860 48187 5107462 4 vente en ligne \n",
"2 13070861 48187 5107462 4 vente en ligne \n",
"3 13070862 48187 5107462 4 vente en ligne \n",
"4 13070863 48187 5107462 4 vente en ligne \n",
"... ... ... ... ... ... \n",
"1826667 20662815 1256135 8007697 5 vente en ligne \n",
"1826668 20662816 1256136 8007698 5 vente en ligne \n",
"1826669 20662817 1256136 8007698 5 vente en ligne \n",
"1826670 20662818 1256137 8007699 5 vente en ligne \n",
"1826671 20662819 1256137 8007699 5 vente en ligne \n",
"\n",
" purchase_date type_of_ticket_name amount \\\n",
"0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
"1 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"2 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"3 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"4 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"... ... ... ... \n",
"1826667 2023-11-08 17:23:54+00:00 Atelier 11.0 \n",
"1826668 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826669 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826670 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"1826671 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"\n",
" children is_full_price name_event_types name_facilities \\\n",
"0 pricing_formula False spectacle vivant mucem \n",
"1 pricing_formula False spectacle vivant mucem \n",
"2 pricing_formula False spectacle vivant mucem \n",
"3 pricing_formula False spectacle vivant mucem \n",
"4 pricing_formula False spectacle vivant mucem \n",
"... ... ... ... ... \n",
"1826667 pricing_formula False offre muséale groupe mucem \n",
"1826668 pricing_formula False offre muséale groupe mucem \n",
"1826669 pricing_formula False offre muséale groupe mucem \n",
"1826670 pricing_formula False offre muséale groupe mucem \n",
"1826671 pricing_formula False offre muséale groupe mucem \n",
"\n",
" name_categories name_events name_seasons \n",
"0 indiv prog enfant l'école des magiciens 2018 \n",
"1 indiv prog enfant l'école des magiciens 2018 \n",
"2 indiv prog enfant l'école des magiciens 2018 \n",
"3 indiv prog enfant l'école des magiciens 2018 \n",
"4 indiv prog enfant l'école des magiciens 2018 \n",
"... ... ... ... \n",
"1826667 indiv entrées tp NaN 2023 \n",
"1826668 indiv entrées tp NaN 2023 \n",
"1826669 indiv entrées tp NaN 2023 \n",
"1826670 indiv entrées tp NaN 2023 \n",
"1826671 indiv entrées tp NaN 2023 \n",
"\n",
"[1826672 rows x 15 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases"
]
},
{
"cell_type": "markdown",
"id": "b8a90eaa-c383-4f73-9fd6-6fbbe8eeefb8",
@ -782,7 +1238,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 17,
"id": "dc45c1cd-2a78-48a6-aa2b-6a501254b6f2",
"metadata": {},
"outputs": [
@ -1002,7 +1458,7 @@
"[5 rows x 40 columns]"
]
},
"execution_count": 15,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@ -1022,7 +1478,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 18,
"id": "89fcb455-efb4-4ad4-ab88-efd6c8a76287",
"metadata": {},
"outputs": [
@ -1043,7 +1499,7 @@
" dtype='object')"
]
},
"execution_count": 16,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -1054,7 +1510,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 19,
"id": "d7b2356a-d5fc-4547-b3ff-fded0e304fb6",
"metadata": {},
"outputs": [
@ -1178,7 +1634,7 @@
"9 0.0 "
]
},
"execution_count": 17,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@ -1197,7 +1653,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 20,
"id": "5559748f-1745-4651-a9f6-94702c7ee66f",
"metadata": {},
"outputs": [
@ -1357,7 +1813,7 @@
"max 434.000000 "
]
},
"execution_count": 18,
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@ -1379,7 +1835,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 21,
"id": "4971e35d-a762-4e18-9443-fd9571bd3f1e",
"metadata": {},
"outputs": [
@ -1408,7 +1864,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 22,
"id": "bc65a711-d172-4839-b487-3047280fc3a6",
"metadata": {},
"outputs": [
@ -1438,7 +1894,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 23,
"id": "c95cc35c-abfc-47c7-9b8a-ac69bfd60dd8",
"metadata": {},
"outputs": [
@ -1466,7 +1922,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 24,
"id": "49d5fd2d-9bc1-43ac-9270-1efd73759854",
"metadata": {},
"outputs": [
@ -1511,7 +1967,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 25,
"id": "e50e2583-4b8f-478e-87ac-591dde200af8",
"metadata": {},
"outputs": [
@ -1532,7 +1988,7 @@
" dtype='object')"
]
},
"execution_count": 23,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@ -1543,7 +1999,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 26,
"id": "c724a315-9fe8-4874-be8f-a8115b17b5e2",
"metadata": {},
"outputs": [],
@ -1565,7 +2021,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 27,
"id": "58af5dcb-673e-4f4d-ad5c-f66ce1e8a22c",
"metadata": {},
"outputs": [
@ -1586,7 +2042,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 28,
"id": "cc3437f7-8b36-4398-9da6-ff15e8e4c8d7",
"metadata": {},
"outputs": [

File diff suppressed because one or more lines are too long