From eaf1884bb6e87756cbe10da032fe237f41db3260 Mon Sep 17 00:00:00 2001 From: frodrigue-ensae Date: Tue, 5 Mar 2024 13:36:03 +0000 Subject: [PATCH] anova --- Spectacle/Stat_desc.ipynb | 352 +------------------------------------- 1 file changed, 2 insertions(+), 350 deletions(-) diff --git a/Spectacle/Stat_desc.ipynb b/Spectacle/Stat_desc.ipynb index 52f066e..b048fb1 100644 --- a/Spectacle/Stat_desc.ipynb +++ b/Spectacle/Stat_desc.ipynb @@ -4168,355 +4168,7 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "58cbe8a5-3899-4aa3-91ab-48bed9124fbd", - "metadata": {}, - "outputs": [], - "source": [ - "#creation des base servant au test d'anova\n", - "\n", - "time_between_purchase_10 = products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle['number_compagny'] == 10]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "f563009c-e9a8-4e09-a345-87c49cbd4485", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 581.065880\n", - "1 0.000023\n", - "2 1032.951030\n", - "3 2502.966319\n", - "4 1728.097037\n", - " ... \n", - "35217 0.000000\n", - "35218 0.000000\n", - "35219 0.000000\n", - "35220 0.000000\n", - "35221 0.000000\n", - "Name: time_between_purchase, Length: 35222, dtype: float64\n", - "0 753.75116\n", - "1 0.00000\n", - "2 0.00000\n", - "3 0.00000\n", - "4 0.00000\n", - " ... \n", - "213494 0.00000\n", - "213495 0.00000\n", - "213496 0.00000\n", - "213497 0.00000\n", - "213498 0.00000\n", - "Name: time_between_purchase, Length: 213499, dtype: float64\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "# Supposons que vous avez un DataFrame appelé products_purchased_reduced_spectacle\n", - "# qui contient les données de vos produits achetés avec la colonne 'number_compagny'\n", - "\n", - "# Créez une liste des identifiants d'entreprise pour lesquels vous voulez effectuer la sélection\n", - "entreprises = [11, 12, 13, 14]\n", - "\n", - "# Parcourez chaque entreprise et effectuez la sélection\n", - "for entreprise in entreprises:\n", - " nom_variable = f\"time_between_purchase_{entreprise}\"\n", - " globals()[nom_variable] = products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle['number_compagny'] == entreprise,\"time_between_purchase\"]\n", - "\n", - "# Maintenant, les résultats pour chaque entreprise sont stockés dans des variables distinctes\n", - "# Vous pouvez y accéder directement\n", - "print(time_between_purchase_11)\n", - "print(time_between_purchase_12)\n", - "# et ainsi de suite pour les autres entreprises\n" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "id": "58f49748-e55f-4d1b-b58b-102d02a9e0eb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "number_compagny 4.108441e+09 1.0 23548.336165 0.0\n", - "Residual 1.334471e+11 764878.0 NaN NaN la p-value associé à la stat de fisher est inferieure à 5% donc il y a un lien entre les entreprise et le temps écoulés entre le premier et le dernier achat\n" - ] - } - ], - "source": [ - "#test anova entre les entreprise de spectacle et time_between_purchase\n", - "import statsmodels.api as sm\n", - "from statsmodels.formula.api import ols\n", - "model = ols('time_between_purchase ~ number_compagny', data=products_purchased_reduced_spectacle).fit()\n", - "anova_table = sm.stats.anova_lm(model, typ=2)\n", - "anova_table\n", - "print(anova_table,\"la p-value associé à la stat de fisher est inferieure à 5% donc il y a un lien entre les entreprise et le temps écoulés entre le premier et le dernier achat\" )\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "aa000e3e-a44d-4cb4-b44b-9794f4b711ca", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idnb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetnumber_compagny
011211271.60201778.4076391197.341759581.0658800.011
122246.00101407.3659371407.3659140.0000230.011
2398261.25201461.382106428.4310761032.9510300.011
345048646.00102846.403148343.4368292502.9663190.011
45595657588442.20201732.3382414.2412041728.0970370.011
....................................
352175201442120.00100.2275120.2275120.0000000.011
352185201482192.00100.1113190.1113190.0000000.011
352195201503120.00100.0886690.0886690.0000000.011
3522052015121126.00100.0449880.0449880.0000000.011
3522152015221126.00100.0000000.0000000.0000000.011
\n", - "

35222 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n", - "0 1 12 11 271.60 2 \n", - "1 2 2 2 46.00 1 \n", - "2 3 9 8 261.25 2 \n", - "3 4 50 48 646.00 1 \n", - "4 5 5956 5758 8442.20 2 \n", - "... ... ... ... ... ... \n", - "35217 520144 2 1 20.00 1 \n", - "35218 520148 2 1 92.00 1 \n", - "35219 520150 3 1 20.00 1 \n", - "35220 520151 2 1 126.00 1 \n", - "35221 520152 2 1 126.00 1 \n", - "\n", - " vente_internet_max purchase_date_min purchase_date_max \\\n", - "0 0 1778.407639 1197.341759 \n", - "1 0 1407.365937 1407.365914 \n", - "2 0 1461.382106 428.431076 \n", - "3 0 2846.403148 343.436829 \n", - "4 0 1732.338241 4.241204 \n", - "... ... ... ... \n", - "35217 0 0.227512 0.227512 \n", - "35218 0 0.111319 0.111319 \n", - "35219 0 0.088669 0.088669 \n", - "35220 0 0.044988 0.044988 \n", - "35221 0 0.000000 0.000000 \n", - "\n", - " time_between_purchase nb_tickets_internet number_compagny \n", - "0 581.065880 0.0 11 \n", - "1 0.000023 0.0 11 \n", - "2 1032.951030 0.0 11 \n", - "3 2502.966319 0.0 11 \n", - "4 1728.097037 0.0 11 \n", - "... ... ... ... \n", - "35217 0.000000 0.0 11 \n", - "35218 0.000000 0.0 11 \n", - "35219 0.000000 0.0 11 \n", - "35220 0.000000 0.0 11 \n", - "35221 0.000000 0.0 11 \n", - "\n", - "[35222 rows x 11 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "time_between_purchase_11" - ] - }, - { - "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "id": "e2c51e28-6197-48f0-ab6d-9fc7b3b0de74", "metadata": {}, "outputs": [ @@ -4528,7 +4180,7 @@ "Valeur de p : 0.0\n", "Nombre de degrés de liberté entre les groupes : 4\n", "Nombre de degrés de liberté à l'intérieur des groupes : 764875\n", - "Il y a des différences significatives entre au moins une paire de catégories.\n" + "Il y a des différences significatives entre au moins une des entrepries .\n" ] } ],