From dbb90fb3642251ae51c52317c65af666508b68a8 Mon Sep 17 00:00:00 2001 From: frodrigue-ensae Date: Tue, 5 Mar 2024 01:43:40 +0000 Subject: [PATCH] stat --- Spectacle/Stat_desc.ipynb | 205 ++++++++++++-------------------------- 1 file changed, 62 insertions(+), 143 deletions(-) diff --git a/Spectacle/Stat_desc.ipynb b/Spectacle/Stat_desc.ipynb index 2fb2c0d..c8e2dce 100644 --- a/Spectacle/Stat_desc.ipynb +++ b/Spectacle/Stat_desc.ipynb @@ -3855,163 +3855,74 @@ }, { "cell_type": "code", - "execution_count": 229, - "id": "4c02ae98-9184-41d9-a9ea-ed9eaf719bd3", + "execution_count": 22, + "id": "d64979ba-fccf-45f2-8a15-40ef1b49c74f", "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idnb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetnumber_compagny
0194828829872.0212643.092500718.1493981924.9431028.010
1194843262.0101745.0217361743.0450351.9767010.010
219485131211878.0212649.04474585.2408452563.80390084.010
31948610496.0101944.0776041742.794225201.2833800.010
4194872133.0101742.8777661742.8777660.0000000.010
\n", - "
" - ], - "text/plain": [ - " customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n", - "0 19482 88 29 872.0 2 \n", - "1 19484 3 2 62.0 1 \n", - "2 19485 131 21 1878.0 2 \n", - "3 19486 10 4 96.0 1 \n", - "4 19487 2 1 33.0 1 \n", - "\n", - " vente_internet_max purchase_date_min purchase_date_max \\\n", - "0 1 2643.092500 718.149398 \n", - "1 0 1745.021736 1743.045035 \n", - "2 1 2649.044745 85.240845 \n", - "3 0 1944.077604 1742.794225 \n", - "4 0 1742.877766 1742.877766 \n", - "\n", - " time_between_purchase nb_tickets_internet number_compagny \n", - "0 1924.943102 8.0 10 \n", - "1 1.976701 0.0 10 \n", - "2 2563.803900 84.0 10 \n", - "3 201.283380 0.0 10 \n", - "4 0.000000 0.0 10 " - ] - }, - "execution_count": 229, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_430/3239820253.py:6: DtypeWarning: Columns (39) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " dataset_train = pd.read_csv(file_in, sep=\",\")\n" + ] } ], "source": [ - "products_purchased_reduced_spectacle.head()" + "#base d'entrainement\n", + "\n", + "#FILE_PATH_S3='projet-bdc2324-team1/Generalization/musique/Train_test/dataset_train14.csv'\n", + "\n", + "#with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " #dataset_train = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "635d60cd-2dbc-49da-b0f4-94e16667882f", + "metadata": {}, + "outputs": [], + "source": [ + "#Creation de la variable dependante fictive: 1 si l'individu a effectué un achat au cours de la periode de train et 0 sinon\n", + "\n", + "#dataset_train_modif=dataset_train\n", + "\n", + "#dataset_train_modif[\"y_purchase_fictive\"]=np.random.randint(2, size=dataset_train_modif.shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "ea63e5d6-70f9-4685-8b08-673a47108954", + "metadata": {}, + "outputs": [], + "source": [ + "#dataset_train_modif[\"y_purchase_fictive\"].value_counts(normalize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "98f7645f-ffe6-4d7b-8032-15e65f36af87", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#dataset_train_modif[\"y_purchase_fictive\"]=dataset_train_modif[\"y_purchase_fictive\"].replace([0,1],[\"Purchase_train\",\"no_purchase_train\"])" ] }, { "cell_type": "code", "execution_count": null, - "id": "e1a95b8f-6539-48bd-b09d-6f8f63d25fb2", + "id": "eb6355e0-3f8c-47d9-a5ee-d349040dcf51", "metadata": {}, "outputs": [], "source": [ - "#nombre de ticket par compagnie\n", + "#repartion Chiffre d'affaire selon le numero de la compagnie\n", "\n", - "company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n", - "company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n", - "company_campaigns_stats" + "sns.boxplot(data=chiffre_affaire_categ, y=\"chiffre_affaire\",x=\"categ\",showfliers=False,showmeans=True)\n", + "plt.title(\"Boite à moustache du chiffre d'affaire selon les categories de livre\")" ] }, { @@ -4240,7 +4151,15 @@ { "cell_type": "code", "execution_count": null, - "id": "0a180f0a-c6de-4e66-9ae8-fdbfdf8837c9", + "id": "93a8ae1f-6fbd-4210-a857-728ae472d1c5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3", "metadata": {}, "outputs": [], "source": []