2024-02-10 19:37:38 +01:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"id": "3f41343f-7205-41d9-89dd-88039e301413",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# Statistiques descriptives"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 1,
|
2024-02-10 19:37:38 +01:00
|
|
|
"id": "abfaf341-7b35-4407-9133-d21336c04027",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import os\n",
|
|
|
|
"import s3fs\n",
|
2024-02-10 22:05:09 +01:00
|
|
|
"import re\n",
|
|
|
|
"import matplotlib.pyplot as plt"
|
2024-02-10 19:37:38 +01:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 22:05:09 +01:00
|
|
|
"execution_count": 2,
|
2024-02-10 19:37:38 +01:00
|
|
|
"id": "7fb72fa3-7940-496f-ac78-c2837f65eefa",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2024-02-10 23:19:35 +01:00
|
|
|
"# Create filesystem object\n",
|
|
|
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|
|
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
2024-02-10 19:37:38 +01:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"id": "45d5261f-4d46-49cb-8582-dd2121122b05",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# 1 - Comportement d'achat"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 22:05:09 +01:00
|
|
|
"execution_count": 3,
|
2024-02-10 19:37:38 +01:00
|
|
|
"id": "9376af51-4320-44b6-8f30-1e1234371556",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Chargement des données temporaires\n",
|
|
|
|
"BUCKET = \"projet-bdc2324-team1\"\n",
|
|
|
|
"FILE_KEY_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
|
|
|
|
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
|
|
|
|
"\n",
|
|
|
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
|
|
|
" tickets_kpi = pd.read_csv(file_in, sep=\",\")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 22:05:09 +01:00
|
|
|
"execution_count": 4,
|
2024-02-10 19:37:38 +01:00
|
|
|
"id": "1855dcca-cfce-4c54-90ae-55d9a1ab5d45",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
" <th>event_type_id</th>\n",
|
|
|
|
" <th>nb_tickets</th>\n",
|
|
|
|
" <th>total_amount</th>\n",
|
|
|
|
" <th>nb_suppliers</th>\n",
|
|
|
|
" <th>vente_internet_max</th>\n",
|
|
|
|
" <th>purchase_date_min</th>\n",
|
|
|
|
" <th>purchase_date_max</th>\n",
|
|
|
|
" <th>time_between_purchase</th>\n",
|
|
|
|
" <th>nb_tickets_internet</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>384226</td>\n",
|
|
|
|
" <td>2686540.5</td>\n",
|
|
|
|
" <td>7</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2014-12-03 14:55:37+00:00</td>\n",
|
|
|
|
" <td>2023-11-04 15:12:16+00:00</td>\n",
|
|
|
|
" <td>3258 days 00:16:39</td>\n",
|
|
|
|
" <td>51.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>1</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>453242</td>\n",
|
|
|
|
" <td>3248965.5</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2013-09-23 14:45:01+00:00</td>\n",
|
|
|
|
" <td>2023-11-03 14:11:01+00:00</td>\n",
|
|
|
|
" <td>3692 days 23:26:00</td>\n",
|
|
|
|
" <td>2988.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>2</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>5</td>\n",
|
|
|
|
" <td>201750</td>\n",
|
|
|
|
" <td>1459190.0</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
|
|
|
" <td>2023-11-08 15:59:45+00:00</td>\n",
|
|
|
|
" <td>3803 days 05:21:47</td>\n",
|
|
|
|
" <td>9.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>3</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>217356</td>\n",
|
|
|
|
" <td>1435871.5</td>\n",
|
|
|
|
" <td>5</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2017-01-01 02:20:08+00:00</td>\n",
|
|
|
|
" <td>2019-12-31 02:20:06+00:00</td>\n",
|
|
|
|
" <td>1093 days 23:59:58</td>\n",
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>4</th>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>143</td>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>0</td>\n",
|
|
|
|
" <td>2018-04-07 12:55:07+00:00</td>\n",
|
|
|
|
" <td>2020-03-08 12:06:43+00:00</td>\n",
|
|
|
|
" <td>700 days 23:11:36</td>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
2024-02-10 22:05:09 +01:00
|
|
|
" customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n",
|
|
|
|
"0 1 2 384226 2686540.5 7 \n",
|
|
|
|
"1 1 4 453242 3248965.5 6 \n",
|
|
|
|
"2 1 5 201750 1459190.0 6 \n",
|
|
|
|
"3 1 6 217356 1435871.5 5 \n",
|
|
|
|
"4 2 2 143 0.0 1 \n",
|
2024-02-10 19:37:38 +01:00
|
|
|
"\n",
|
2024-02-10 22:05:09 +01:00
|
|
|
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
|
|
|
"0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n",
|
|
|
|
"1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n",
|
|
|
|
"2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n",
|
|
|
|
"3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n",
|
|
|
|
"4 0 2018-04-07 12:55:07+00:00 2020-03-08 12:06:43+00:00 \n",
|
2024-02-10 19:37:38 +01:00
|
|
|
"\n",
|
2024-02-10 22:05:09 +01:00
|
|
|
" time_between_purchase nb_tickets_internet \n",
|
|
|
|
"0 3258 days 00:16:39 51.0 \n",
|
|
|
|
"1 3692 days 23:26:00 2988.0 \n",
|
|
|
|
"2 3803 days 05:21:47 9.0 \n",
|
|
|
|
"3 1093 days 23:59:58 5.0 \n",
|
|
|
|
"4 700 days 23:11:36 0.0 "
|
2024-02-10 19:37:38 +01:00
|
|
|
]
|
|
|
|
},
|
2024-02-10 22:05:09 +01:00
|
|
|
"execution_count": 4,
|
2024-02-10 19:37:38 +01:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"tickets_kpi.head()"
|
|
|
|
]
|
2024-02-10 22:05:09 +01:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 5,
|
2024-02-10 22:05:09 +01:00
|
|
|
"id": "0e5d3b2e-1a75-4d46-80e6-c306e9f8de84",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Index(['customer_id', 'event_type_id', 'nb_tickets', 'total_amount',\n",
|
|
|
|
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
|
|
|
|
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet'],\n",
|
|
|
|
" dtype='object')"
|
|
|
|
]
|
|
|
|
},
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 5,
|
2024-02-10 22:05:09 +01:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"tickets_kpi.columns"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 6,
|
2024-02-10 22:05:09 +01:00
|
|
|
"id": "7667e8eb-9a1e-4216-96f4-bf987c6e30b5",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
" <th>event_type_id</th>\n",
|
|
|
|
" <th>nb_tickets</th>\n",
|
|
|
|
" <th>total_amount</th>\n",
|
|
|
|
" <th>nb_suppliers</th>\n",
|
|
|
|
" <th>vente_internet_max</th>\n",
|
|
|
|
" <th>purchase_date_min</th>\n",
|
|
|
|
" <th>purchase_date_max</th>\n",
|
|
|
|
" <th>time_between_purchase</th>\n",
|
|
|
|
" <th>nb_tickets_internet</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>1</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>453242</td>\n",
|
|
|
|
" <td>3248965.5</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2013-09-23 14:45:01+00:00</td>\n",
|
|
|
|
" <td>2023-11-03 14:11:01+00:00</td>\n",
|
|
|
|
" <td>3692 days 23:26:00</td>\n",
|
|
|
|
" <td>2988.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>384226</td>\n",
|
|
|
|
" <td>2686540.5</td>\n",
|
|
|
|
" <td>7</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2014-12-03 14:55:37+00:00</td>\n",
|
|
|
|
" <td>2023-11-04 15:12:16+00:00</td>\n",
|
|
|
|
" <td>3258 days 00:16:39</td>\n",
|
|
|
|
" <td>51.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>3</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>217356</td>\n",
|
|
|
|
" <td>1435871.5</td>\n",
|
|
|
|
" <td>5</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2017-01-01 02:20:08+00:00</td>\n",
|
|
|
|
" <td>2019-12-31 02:20:06+00:00</td>\n",
|
|
|
|
" <td>1093 days 23:59:58</td>\n",
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>2</th>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>5</td>\n",
|
|
|
|
" <td>201750</td>\n",
|
|
|
|
" <td>1459190.0</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
|
|
|
" <td>2023-11-08 15:59:45+00:00</td>\n",
|
|
|
|
" <td>3803 days 05:21:47</td>\n",
|
|
|
|
" <td>9.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>5032</th>\n",
|
|
|
|
" <td>6733</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>14208</td>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2017-01-11 15:00:54+00:00</td>\n",
|
|
|
|
" <td>2019-11-27 09:47:06+00:00</td>\n",
|
|
|
|
" <td>1049 days 18:46:12</td>\n",
|
|
|
|
" <td>13497.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>5029</th>\n",
|
|
|
|
" <td>6733</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>11656</td>\n",
|
|
|
|
" <td>471.0</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2015-09-09 13:48:38+00:00</td>\n",
|
|
|
|
" <td>2022-07-07 07:37:12+00:00</td>\n",
|
|
|
|
" <td>2492 days 17:48:34</td>\n",
|
|
|
|
" <td>9815.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>5030</th>\n",
|
|
|
|
" <td>6733</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>7440</td>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2021-01-06 10:05:01+00:00</td>\n",
|
|
|
|
" <td>2022-09-08 14:39:40+00:00</td>\n",
|
|
|
|
" <td>610 days 04:34:39</td>\n",
|
|
|
|
" <td>7419.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>60</th>\n",
|
|
|
|
" <td>41</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>6583</td>\n",
|
|
|
|
" <td>12546.5</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2017-01-02 11:23:53+00:00</td>\n",
|
|
|
|
" <td>2019-12-30 10:36:55+00:00</td>\n",
|
|
|
|
" <td>1091 days 23:13:02</td>\n",
|
|
|
|
" <td>6391.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>57</th>\n",
|
|
|
|
" <td>41</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>6514</td>\n",
|
|
|
|
" <td>22423.0</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2014-01-23 16:56:57+00:00</td>\n",
|
|
|
|
" <td>2023-03-06 13:55:23+00:00</td>\n",
|
|
|
|
" <td>3328 days 20:58:26</td>\n",
|
|
|
|
" <td>5321.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>36376</th>\n",
|
|
|
|
" <td>63488</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>5750</td>\n",
|
|
|
|
" <td>63250.0</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2021-06-04 12:20:39+00:00</td>\n",
|
|
|
|
" <td>2022-08-25 13:08:38+00:00</td>\n",
|
|
|
|
" <td>447 days 00:47:59</td>\n",
|
|
|
|
" <td>5750.0</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n",
|
|
|
|
"1 1 4 453242 3248965.5 6 \n",
|
|
|
|
"0 1 2 384226 2686540.5 7 \n",
|
|
|
|
"3 1 6 217356 1435871.5 5 \n",
|
|
|
|
"2 1 5 201750 1459190.0 6 \n",
|
|
|
|
"5032 6733 6 14208 0.0 3 \n",
|
|
|
|
"5029 6733 2 11656 471.0 3 \n",
|
|
|
|
"5030 6733 4 7440 0.0 2 \n",
|
|
|
|
"60 41 6 6583 12546.5 4 \n",
|
|
|
|
"57 41 2 6514 22423.0 6 \n",
|
|
|
|
"36376 63488 4 5750 63250.0 1 \n",
|
|
|
|
"\n",
|
|
|
|
" vente_internet_max purchase_date_min \\\n",
|
|
|
|
"1 1 2013-09-23 14:45:01+00:00 \n",
|
|
|
|
"0 1 2014-12-03 14:55:37+00:00 \n",
|
|
|
|
"3 1 2017-01-01 02:20:08+00:00 \n",
|
|
|
|
"2 1 2013-06-10 10:37:58+00:00 \n",
|
|
|
|
"5032 1 2017-01-11 15:00:54+00:00 \n",
|
|
|
|
"5029 1 2015-09-09 13:48:38+00:00 \n",
|
|
|
|
"5030 1 2021-01-06 10:05:01+00:00 \n",
|
|
|
|
"60 1 2017-01-02 11:23:53+00:00 \n",
|
|
|
|
"57 1 2014-01-23 16:56:57+00:00 \n",
|
|
|
|
"36376 1 2021-06-04 12:20:39+00:00 \n",
|
|
|
|
"\n",
|
|
|
|
" purchase_date_max time_between_purchase nb_tickets_internet \n",
|
|
|
|
"1 2023-11-03 14:11:01+00:00 3692 days 23:26:00 2988.0 \n",
|
|
|
|
"0 2023-11-04 15:12:16+00:00 3258 days 00:16:39 51.0 \n",
|
|
|
|
"3 2019-12-31 02:20:06+00:00 1093 days 23:59:58 5.0 \n",
|
|
|
|
"2 2023-11-08 15:59:45+00:00 3803 days 05:21:47 9.0 \n",
|
|
|
|
"5032 2019-11-27 09:47:06+00:00 1049 days 18:46:12 13497.0 \n",
|
|
|
|
"5029 2022-07-07 07:37:12+00:00 2492 days 17:48:34 9815.0 \n",
|
|
|
|
"5030 2022-09-08 14:39:40+00:00 610 days 04:34:39 7419.0 \n",
|
|
|
|
"60 2019-12-30 10:36:55+00:00 1091 days 23:13:02 6391.0 \n",
|
|
|
|
"57 2023-03-06 13:55:23+00:00 3328 days 20:58:26 5321.0 \n",
|
|
|
|
"36376 2022-08-25 13:08:38+00:00 447 days 00:47:59 5750.0 "
|
|
|
|
]
|
|
|
|
},
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 6,
|
2024-02-10 22:05:09 +01:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# Présence d'outlier\n",
|
|
|
|
"tickets_kpi.sort_values(by = ['nb_tickets'], axis = 0, ascending = False).head(10)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 7,
|
2024-02-10 22:05:09 +01:00
|
|
|
"id": "9b2e27f2-703d-465b-a0f9-76e996de617c",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Part du CA par customer\n",
|
|
|
|
"total_amount_share = tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
|
|
|
|
"total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
|
|
|
|
"total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
|
|
|
|
"\n",
|
|
|
|
"total_amount_share_index = total_amount_share.set_index('customer_id')\n",
|
|
|
|
"df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 8,
|
2024-02-10 22:05:09 +01:00
|
|
|
"id": "36141803-8865-4210-bd39-0a980301fd0c",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEWCAYAAAAtl/EzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3MElEQVR4nO3dd3hTZfsH8G9WkzbdpXtSRpllgyCIRZFVEARUZBVQprhFRFkKIv4c+IIvLhRRUMSBIoJFlsree7RQWlZp6V5pM+7fH30bSQddSc85yf25rl6Q5OSc+5ycfHPm88iIiMAYYwKTC10AY4wBHEaMMZHgMGKMiQKHEWNMFDiMGGOiwGHEGBMFDiPGmChwGDHGRIHDiDEmCg0WRidPnoSLiwuWL1/eUJNkjElIrcJo9erVkMlk5j+lUonAwEA8/vjjSEhIqPJ9eXl5GDFiBGbOnImZM2fWu+j6+P3337FgwYJKX4uIiEBcXJz58Y0bN7BgwQIcP368wrALFiyATCazTZF1JJPJqpw3R3H27FksWLAAV65caZDpvfXWW9i4caPVxmeN+u+2josa1cKXX35JAOjLL7+kffv20c6dO2nRokXk7OxMfn5+lJmZWen7Ro4cSaNHjyaTyVSbydnEjBkzqKrZPnr0KCUmJpofHzp0yDy/5V29epX27dtnqzLrBADNnz9f6DIEtWHDBgJAO3fubJDpabVaGj9+vNXGZ43677aOi5myLgHWpk0bdO7cGQBw//33w2g0Yv78+di4cSMmTJhQYfjvv/++jlFpPYWFhXBxcbnrMB06dKjx+EJCQhASElLfshhjZWqTXGVbRocOHbJ4fvPmzQSAlixZYvH8oUOHaPDgweTl5UVqtZrat29P69evr3Sc8fHxFBcXR15eXuTi4kKxsbF06dIli2Hj4+NpyJAhFBwcTGq1mpo0aUKTJ0+m9PR0i+Hmz59PAOjIkSM0fPhw8vT0pICAABo/fjwBqPCXlJRERETh4eHmX7mdO3dWOmzZlkfZNO5kNBpp6dKlFBUVRU5OTuTr60tjx46lq1evWgzXu3dvat26NR08eJB69uxJzs7O1LhxY1qyZAkZjcZqP4ecnBx68sknydvbm7RaLfXr148uXLhQ6ZbRxYsXadSoUeTr60tOTk7UokULWrFiRYW633zzTWrevDlpNBry8PCgtm3b0rJly+5aR9kyWrt2Lc2aNYsCAgJIq9VSbGwspaamUm5uLj311FPk4+NDPj4+FBcXR3l5eRbjKCoqotmzZ1NERASpVCoKCgqi6dOnU1ZWlsVw4eHhNGjQINqyZQt16NCBNBoNRUVF0apVq8zDlK1L5f/Ktmxru/6cPn2aHn/8cXJ3dyc/Pz+aMGECZWdnm4erbFq9e/cmIqKCggJ68cUXKSIigtRqNXl5eVGnTp1o3bp1VS7P6uonIlq1ahVFR0ebxzl06FA6e/as+fXq1vEVK1ZQr169yNfXl1xcXKhNmza0dOlSKikpqbC8K9vi6927t3keiYimTJlCarWaDh8+bH7OaDRSnz59yM/Pj27cuFHl/JZnlTBasWIFAaAff/zR/NyOHTvIycmJevXqRevXr6etW7dSXFxchYVbNs7Q0FCaOHEibdmyhT799FPy8/Oj0NBQi5Vy5cqVtGTJEvr1119p9+7d9NVXX1G7du0oKirKYmGWrUzh4eH0yiuv0LZt22jjxo2UmJhII0aMIAC0b98+859OpyMiyw8gJyfHXNvrr79uHrYsWCoLo8mTJxMAevrpp2nr1q308ccfk6+vL4WGhlqs8L179yYfHx9q1qwZffzxx7Rt2zaaPn06AaCvvvrqrp+ByWSimJgYUqvVtHjxYoqPj6f58+dTZGRkhTA6c+aMOVjWrFlD8fHx9OKLL5JcLqcFCxaYh1uyZAkpFAqaP38+bd++nbZu3UrLli2zGKYyZWEUHh5OcXFx5nl2dXWlmJgY6tu3L7300ksUHx9PS5cuJYVCQTNnzrSYl379+pFSqaS5c+dSfHw8vfvuu6TVaqlDhw7mz6XsswkJCaFWrVrRmjVr6I8//qCRI0cSANq9ezcREaWlpdFbb71FAOijjz4yf2ZpaWl1Wn+ioqJo3rx5tG3bNnr//fdJrVbThAkTzMPt27ePnJ2daeDAgeZpnTlzhohKv6QuLi70/vvv086dO+m3336jt99+m5YvX17l8qyu/rLXRo0aRZs3b6Y1a9ZQZGQkeXh40MWLF4mIql3Hn3/+eVq5ciVt3bqVduzYQR988AE1atTIYr7KlndNwqioqIjat29PkZGR5u/qvHnzSC6XU3x8fJXzWpk6hdH+/ftJr9dTXl4ebd26lQICAui+++4jvV5vHrZFixbUoUMHi+eIiGJjYykwMNC8BVA2zmHDhlkMt2fPHgJAixYtqrQWk8lEer2ekpOTCQD98ssv5tfKVqZ58+ZVeN/d9qfLfwB3O2ZUPozOnTtHAGj69OkWwx04cIAA0Jw5c8zP9e7dmwDQgQMHLIZt1aoV9evXr9LaymzZsoUA0Icffmjx/OLFiyuEUb9+/SgkJIRycnIshn366adJo9GYj/HFxsZS+/bt7zrdypSF0eDBgy2ef+655wgAPfPMMxbPDx06lLy9vc2Pt27dSgDonXfesRhu/fr1BIA+/fRT83Ph4eGk0WgoOTnZ/FxRURF5e3vTlClTzM/V9JhLTdaf8nVNnz6dNBqNxbHPqo4ZtWnThoYOHXrXGipTVf1ZWVnm4LtTSkoKqdVqeuKJJ8zP1fSYkdFoJL1eT2vWrCGFQmFxzLemYURElJCQQO7u7jR06FD6888/SS6X0+uvv179zJZTp1P799xzD1QqFdzc3NC/f394eXnhl19+gVJZeggqMTER58+fx+jRowEABoPB/Ddw4EDcvHkTFy5csBhn2bBlevTogfDwcOzcudP8XFpaGqZOnYrQ0FAolUqoVCqEh4cDAM6dO1ehzuHDh9dl9uqkrM47z8YBQNeuXdGyZUts377d4vmAgAB07drV4rno6GgkJyfXaDrll9cTTzxh8Vin02H79u0YNmwYXFxcKnwGOp0O+/fvN9d44sQJTJ8+HX/88Qdyc3NrNtP/Exsba/G4ZcuWAIBBgwZVeD4zMxP5+fkAgB07dgCouMxGjhwJrVZbYZm1b98eYWFh5scajQbNmzevdpmVqe36M2TIEIvH0dHR0Ol0SEtLq3ZaXbt2xZYtWzB79mzs2rULRUVFNaqxKvv27UNRUVGFZRUaGoo+ffpUWFZVOXbsGIYMGQIfHx8oFAqoVCqMGzcORqMRFy9erFNtTZs2xWeffYaNGzciNjYWvXr1qtPZvDqF0Zo1a3Do0CHs2LEDU6ZMwblz5zBq1Cjz67du3QIAvPTSS1CpVBZ/06dPBwDcvn3bYpwBAQEVphMQEICMjAwAgMlkwkMPPYSffvoJs2bNwvbt23Hw4EHzF6qyDzswMLAus1cnZXVWNs2goCDz62V8fHwqDKdWq6tdaTMyMqBUKiu8v/zyy8jIgMFgwPLlyyt8BgMHDgTw72fw6quv4t1338X+/fsxYMAA+Pj44IEHHsDhw4ermetS3t7eFo+dnJzu+rxOp7OYF19fX4vhZDKZxWdfpq7LDKjb+lN+emq1usphy/vPf/6DV155BRs3bkRMTAy8vb0xdOjQu14Ccze1Xb8qk5KSgl69euH69ev48MMP8ffff+PQoUP46KOPANRsvqoyaNAg+Pv7Q6fT4YUXXoBCoaj1OOp0Nq1ly5bms2kxMTEwGo34/PPP8cMPP2DEiBFo1KgRgNKV/JFHHql0HFFRURaPU1NTKwyTmpqKpk2bAgBOnz6NEydOYPXq1Rg/frx5mMTExCrrbMjrgMpW3Js3b1Y4y3bjxg3zMrHGdAwGAzIyMiy+LOWXn5eXFxQKBcaOHYsZM2ZUOq7GjRsDAJRKJV544QW88MILyM7Oxp9//ok5c+agX79+uHr1arVnIes7L+np6RaBRERITU1Fly5drDatuqw/9aHVarFw4UIsXLgQt27dMm8lDR48GOfPn6/1+O5cv8qr6fq1ceNGFBQU4KeffjJ
|
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 300x300 with 1 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# Costumer 1 vs others customers\n",
|
|
|
|
"coupure = 1\n",
|
|
|
|
"\n",
|
|
|
|
"top = df_circulaire[:coupure]\n",
|
|
|
|
"rest = df_circulaire[coupure:]\n",
|
|
|
|
"\n",
|
|
|
|
"# Calculez la somme du reste\n",
|
|
|
|
"rest_sum = rest.sum()\n",
|
|
|
|
"\n",
|
|
|
|
"# Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
|
|
|
|
"new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
|
|
|
|
"\n",
|
|
|
|
"# Créez le graphique circulaire\n",
|
|
|
|
"plt.figure(figsize=(3, 3))\n",
|
|
|
|
"plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
|
|
|
|
"plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
|
|
|
|
"plt.title('Répartition des montants totaux')\n",
|
|
|
|
"plt.show()\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-02-10 23:19:35 +01:00
|
|
|
"execution_count": 9,
|
2024-02-10 22:05:09 +01:00
|
|
|
"id": "94cf1a25-9ded-48f2-b1b2-75225bdaf49d",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"tickets_kpi_filtered = tickets_kpi[tickets_kpi['customer_id'] != 1]"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "31e4e6f1-efc4-410d-b1d3-bb49950ef58e",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
2024-02-10 19:37:38 +01:00
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.10.13"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 5
|
|
|
|
}
|