This commit is contained in:
Fanta RODRIGUE 2024-03-05 13:36:03 +00:00
parent 29ac99df14
commit eaf1884bb6

View File

@ -4168,355 +4168,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"id": "58cbe8a5-3899-4aa3-91ab-48bed9124fbd",
"metadata": {},
"outputs": [],
"source": [
"#creation des base servant au test d'anova\n",
"\n",
"time_between_purchase_10 = products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle['number_compagny'] == 10]\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "f563009c-e9a8-4e09-a345-87c49cbd4485",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 581.065880\n",
"1 0.000023\n",
"2 1032.951030\n",
"3 2502.966319\n",
"4 1728.097037\n",
" ... \n",
"35217 0.000000\n",
"35218 0.000000\n",
"35219 0.000000\n",
"35220 0.000000\n",
"35221 0.000000\n",
"Name: time_between_purchase, Length: 35222, dtype: float64\n",
"0 753.75116\n",
"1 0.00000\n",
"2 0.00000\n",
"3 0.00000\n",
"4 0.00000\n",
" ... \n",
"213494 0.00000\n",
"213495 0.00000\n",
"213496 0.00000\n",
"213497 0.00000\n",
"213498 0.00000\n",
"Name: time_between_purchase, Length: 213499, dtype: float64\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"# Supposons que vous avez un DataFrame appelé products_purchased_reduced_spectacle\n",
"# qui contient les données de vos produits achetés avec la colonne 'number_compagny'\n",
"\n",
"# Créez une liste des identifiants d'entreprise pour lesquels vous voulez effectuer la sélection\n",
"entreprises = [11, 12, 13, 14]\n",
"\n",
"# Parcourez chaque entreprise et effectuez la sélection\n",
"for entreprise in entreprises:\n",
" nom_variable = f\"time_between_purchase_{entreprise}\"\n",
" globals()[nom_variable] = products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle['number_compagny'] == entreprise,\"time_between_purchase\"]\n",
"\n",
"# Maintenant, les résultats pour chaque entreprise sont stockés dans des variables distinctes\n",
"# Vous pouvez y accéder directement\n",
"print(time_between_purchase_11)\n",
"print(time_between_purchase_12)\n",
"# et ainsi de suite pour les autres entreprises\n"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "58f49748-e55f-4d1b-b58b-102d02a9e0eb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" sum_sq df F PR(>F)\n",
"number_compagny 4.108441e+09 1.0 23548.336165 0.0\n",
"Residual 1.334471e+11 764878.0 NaN NaN la p-value associé à la stat de fisher est inferieure à 5% donc il y a un lien entre les entreprise et le temps écoulés entre le premier et le dernier achat\n"
]
}
],
"source": [
"#test anova entre les entreprise de spectacle et time_between_purchase\n",
"import statsmodels.api as sm\n",
"from statsmodels.formula.api import ols\n",
"model = ols('time_between_purchase ~ number_compagny', data=products_purchased_reduced_spectacle).fit()\n",
"anova_table = sm.stats.anova_lm(model, typ=2)\n",
"anova_table\n",
"print(anova_table,\"la p-value associé à la stat de fisher est inferieure à 5% donc il y a un lien entre les entreprise et le temps écoulés entre le premier et le dernier achat\" )\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "aa000e3e-a44d-4cb4-b44b-9794f4b711ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>12</td>\n",
" <td>11</td>\n",
" <td>271.60</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1778.407639</td>\n",
" <td>1197.341759</td>\n",
" <td>581.065880</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>46.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1407.365937</td>\n",
" <td>1407.365914</td>\n",
" <td>0.000023</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>8</td>\n",
" <td>261.25</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1461.382106</td>\n",
" <td>428.431076</td>\n",
" <td>1032.951030</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>50</td>\n",
" <td>48</td>\n",
" <td>646.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2846.403148</td>\n",
" <td>343.436829</td>\n",
" <td>2502.966319</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>5956</td>\n",
" <td>5758</td>\n",
" <td>8442.20</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1732.338241</td>\n",
" <td>4.241204</td>\n",
" <td>1728.097037</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35217</th>\n",
" <td>520144</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>20.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.227512</td>\n",
" <td>0.227512</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35218</th>\n",
" <td>520148</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>92.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.111319</td>\n",
" <td>0.111319</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35219</th>\n",
" <td>520150</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>20.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.088669</td>\n",
" <td>0.088669</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35220</th>\n",
" <td>520151</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>126.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.044988</td>\n",
" <td>0.044988</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35221</th>\n",
" <td>520152</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>126.00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>35222 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 1 12 11 271.60 2 \n",
"1 2 2 2 46.00 1 \n",
"2 3 9 8 261.25 2 \n",
"3 4 50 48 646.00 1 \n",
"4 5 5956 5758 8442.20 2 \n",
"... ... ... ... ... ... \n",
"35217 520144 2 1 20.00 1 \n",
"35218 520148 2 1 92.00 1 \n",
"35219 520150 3 1 20.00 1 \n",
"35220 520151 2 1 126.00 1 \n",
"35221 520152 2 1 126.00 1 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0 1778.407639 1197.341759 \n",
"1 0 1407.365937 1407.365914 \n",
"2 0 1461.382106 428.431076 \n",
"3 0 2846.403148 343.436829 \n",
"4 0 1732.338241 4.241204 \n",
"... ... ... ... \n",
"35217 0 0.227512 0.227512 \n",
"35218 0 0.111319 0.111319 \n",
"35219 0 0.088669 0.088669 \n",
"35220 0 0.044988 0.044988 \n",
"35221 0 0.000000 0.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet number_compagny \n",
"0 581.065880 0.0 11 \n",
"1 0.000023 0.0 11 \n",
"2 1032.951030 0.0 11 \n",
"3 2502.966319 0.0 11 \n",
"4 1728.097037 0.0 11 \n",
"... ... ... ... \n",
"35217 0.000000 0.0 11 \n",
"35218 0.000000 0.0 11 \n",
"35219 0.000000 0.0 11 \n",
"35220 0.000000 0.0 11 \n",
"35221 0.000000 0.0 11 \n",
"\n",
"[35222 rows x 11 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"time_between_purchase_11"
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 29,
"id": "e2c51e28-6197-48f0-ab6d-9fc7b3b0de74",
"metadata": {},
"outputs": [
@ -4528,7 +4180,7 @@
"Valeur de p : 0.0\n",
"Nombre de degrés de liberté entre les groupes : 4\n",
"Nombre de degrés de liberté à l'intérieur des groupes : 764875\n",
"Il y a des différences significatives entre au moins une paire de catégories.\n"
"Il y a des différences significatives entre au moins une des entrepries .\n"
]
}
],