diff --git a/1_Descriptive_Statistics.ipynb b/1_Descriptive_Statistics.ipynb index 0eefa74..1009391 100644 --- a/1_Descriptive_Statistics.ipynb +++ b/1_Descriptive_Statistics.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 23, "id": "abfaf341-7b35-4407-9133-d21336c04027", "metadata": {}, "outputs": [], @@ -19,12 +19,13 @@ "import numpy as np\n", "import os\n", "import s3fs\n", - "import re" + "import re\n", + "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "7fb72fa3-7940-496f-ac78-c2837f65eefa", "metadata": {}, "outputs": [], @@ -43,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "id": "9376af51-4320-44b6-8f30-1e1234371556", "metadata": {}, "outputs": [], @@ -59,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "id": "1855dcca-cfce-4c54-90ae-55d9a1ab5d45", "metadata": {}, "outputs": [ @@ -84,7 +85,6 @@ " \n", " \n", " \n", - " Unnamed: 0\n", " customer_id\n", " event_type_id\n", " nb_tickets\n", @@ -100,7 +100,6 @@ " \n", " \n", " 0\n", - " 0\n", " 1\n", " 2\n", " 384226\n", @@ -115,7 +114,6 @@ " \n", " 1\n", " 1\n", - " 1\n", " 4\n", " 453242\n", " 3248965.5\n", @@ -128,7 +126,6 @@ " \n", " \n", " 2\n", - " 2\n", " 1\n", " 5\n", " 201750\n", @@ -142,7 +139,6 @@ " \n", " \n", " 3\n", - " 3\n", " 1\n", " 6\n", " 217356\n", @@ -156,7 +152,6 @@ " \n", " \n", " 4\n", - " 4\n", " 2\n", " 2\n", " 143\n", @@ -173,26 +168,271 @@ "" ], "text/plain": [ - " Unnamed: 0 customer_id event_type_id nb_tickets total_amount \\\n", - "0 0 1 2 384226 2686540.5 \n", - "1 1 1 4 453242 3248965.5 \n", - "2 2 1 5 201750 1459190.0 \n", - "3 3 1 6 217356 1435871.5 \n", - "4 4 2 2 143 0.0 \n", + " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n", + "0 1 2 384226 2686540.5 7 \n", + "1 1 4 453242 3248965.5 6 \n", + "2 1 5 201750 1459190.0 6 \n", + "3 1 6 217356 1435871.5 5 \n", + "4 2 2 143 0.0 1 \n", "\n", - " nb_suppliers vente_internet_max purchase_date_min \\\n", - "0 7 1 2014-12-03 14:55:37+00:00 \n", - "1 6 1 2013-09-23 14:45:01+00:00 \n", - "2 6 1 2013-06-10 10:37:58+00:00 \n", - "3 5 1 2017-01-01 02:20:08+00:00 \n", - "4 1 0 2018-04-07 12:55:07+00:00 \n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n", + "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n", + "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n", + "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n", + "4 0 2018-04-07 12:55:07+00:00 2020-03-08 12:06:43+00:00 \n", "\n", - " purchase_date_max time_between_purchase nb_tickets_internet \n", - "0 2023-11-04 15:12:16+00:00 3258 days 00:16:39 51.0 \n", - "1 2023-11-03 14:11:01+00:00 3692 days 23:26:00 2988.0 \n", - "2 2023-11-08 15:59:45+00:00 3803 days 05:21:47 9.0 \n", - "3 2019-12-31 02:20:06+00:00 1093 days 23:59:58 5.0 \n", - "4 2020-03-08 12:06:43+00:00 700 days 23:11:36 0.0 " + " time_between_purchase nb_tickets_internet \n", + "0 3258 days 00:16:39 51.0 \n", + "1 3692 days 23:26:00 2988.0 \n", + "2 3803 days 05:21:47 9.0 \n", + "3 1093 days 23:59:58 5.0 \n", + "4 700 days 23:11:36 0.0 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tickets_kpi.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0e5d3b2e-1a75-4d46-80e6-c306e9f8de84", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['customer_id', 'event_type_id', 'nb_tickets', 'total_amount',\n", + " 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n", + " 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet'],\n", + " dtype='object')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tickets_kpi.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "7667e8eb-9a1e-4216-96f4-bf987c6e30b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idevent_type_idnb_ticketstotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internet
1144532423248965.5612013-09-23 14:45:01+00:002023-11-03 14:11:01+00:003692 days 23:26:002988.0
0123842262686540.5712014-12-03 14:55:37+00:002023-11-04 15:12:16+00:003258 days 00:16:3951.0
3162173561435871.5512017-01-01 02:20:08+00:002019-12-31 02:20:06+00:001093 days 23:59:585.0
2152017501459190.0612013-06-10 10:37:58+00:002023-11-08 15:59:45+00:003803 days 05:21:479.0
503267336142080.0312017-01-11 15:00:54+00:002019-11-27 09:47:06+00:001049 days 18:46:1213497.0
50296733211656471.0312015-09-09 13:48:38+00:002022-07-07 07:37:12+00:002492 days 17:48:349815.0
50306733474400.0212021-01-06 10:05:01+00:002022-09-08 14:39:40+00:00610 days 04:34:397419.0
60416658312546.5412017-01-02 11:23:53+00:002019-12-30 10:36:55+00:001091 days 23:13:026391.0
57412651422423.0612014-01-23 16:56:57+00:002023-03-06 13:55:23+00:003328 days 20:58:265321.0
36376634884575063250.0112021-06-04 12:20:39+00:002022-08-25 13:08:38+00:00447 days 00:47:595750.0
\n", + "
" + ], + "text/plain": [ + " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n", + "1 1 4 453242 3248965.5 6 \n", + "0 1 2 384226 2686540.5 7 \n", + "3 1 6 217356 1435871.5 5 \n", + "2 1 5 201750 1459190.0 6 \n", + "5032 6733 6 14208 0.0 3 \n", + "5029 6733 2 11656 471.0 3 \n", + "5030 6733 4 7440 0.0 2 \n", + "60 41 6 6583 12546.5 4 \n", + "57 41 2 6514 22423.0 6 \n", + "36376 63488 4 5750 63250.0 1 \n", + "\n", + " vente_internet_max purchase_date_min \\\n", + "1 1 2013-09-23 14:45:01+00:00 \n", + "0 1 2014-12-03 14:55:37+00:00 \n", + "3 1 2017-01-01 02:20:08+00:00 \n", + "2 1 2013-06-10 10:37:58+00:00 \n", + "5032 1 2017-01-11 15:00:54+00:00 \n", + "5029 1 2015-09-09 13:48:38+00:00 \n", + "5030 1 2021-01-06 10:05:01+00:00 \n", + "60 1 2017-01-02 11:23:53+00:00 \n", + "57 1 2014-01-23 16:56:57+00:00 \n", + "36376 1 2021-06-04 12:20:39+00:00 \n", + "\n", + " purchase_date_max time_between_purchase nb_tickets_internet \n", + "1 2023-11-03 14:11:01+00:00 3692 days 23:26:00 2988.0 \n", + "0 2023-11-04 15:12:16+00:00 3258 days 00:16:39 51.0 \n", + "3 2019-12-31 02:20:06+00:00 1093 days 23:59:58 5.0 \n", + "2 2023-11-08 15:59:45+00:00 3803 days 05:21:47 9.0 \n", + "5032 2019-11-27 09:47:06+00:00 1049 days 18:46:12 13497.0 \n", + "5029 2022-07-07 07:37:12+00:00 2492 days 17:48:34 9815.0 \n", + "5030 2022-09-08 14:39:40+00:00 610 days 04:34:39 7419.0 \n", + "60 2019-12-30 10:36:55+00:00 1091 days 23:13:02 6391.0 \n", + "57 2023-03-06 13:55:23+00:00 3328 days 20:58:26 5321.0 \n", + "36376 2022-08-25 13:08:38+00:00 447 days 00:47:59 5750.0 " ] }, "execution_count": 12, @@ -201,8 +441,81 @@ } ], "source": [ - "tickets_kpi.head()" + "# Présence d'outlier\n", + "tickets_kpi.sort_values(by = ['nb_tickets'], axis = 0, ascending = False).head(10)" ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "9b2e27f2-703d-465b-a0f9-76e996de617c", + "metadata": {}, + "outputs": [], + "source": [ + "# Part du CA par customer\n", + "total_amount_share = tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n", + "total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n", + "total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n", + "\n", + "total_amount_share_index = total_amount_share.set_index('customer_id')\n", + "df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "36141803-8865-4210-bd39-0a980301fd0c", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Costumer 1 vs others customers\n", + "coupure = 1\n", + "\n", + "top = df_circulaire[:coupure]\n", + "rest = df_circulaire[coupure:]\n", + "\n", + "# Calculez la somme du reste\n", + "rest_sum = rest.sum()\n", + "\n", + "# Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n", + "new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n", + "\n", + "# Créez le graphique circulaire\n", + "plt.figure(figsize=(3, 3))\n", + "plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n", + "plt.axis('equal') # Assurez-vous que le graphique est un cercle\n", + "plt.title('Répartition des montants totaux')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "94cf1a25-9ded-48f2-b1b2-75225bdaf49d", + "metadata": {}, + "outputs": [], + "source": [ + "tickets_kpi_filtered = tickets_kpi[tickets_kpi['customer_id'] != 1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31e4e6f1-efc4-410d-b1d3-bb49950ef58e", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {