completed segment mp analysis sport

2024-03-26 11:20:03 +00:00 · 2024-03-26 11:20:03 +00:00 · 2165c7c16e
commit 2165c7c16e
parent dbd87dadd9
3 changed files with 1893 additions and 152 deletions
--- a/Sport/Modelization/CA_segment_sport.ipynb
+++ b/Sport/Modelization/CA_segment_sport.ipynb
@ -2049,7 +2049,7 @@
   "source": [
    "# comparison between score and adjusted score - export csv associated\n",
    "\n",
-    "file_name = \"table_adjusted_score\"\n",
+    "file_name = \"table_adjusted_score_\"\n",
    "FILE_PATH_OUT_S3 = PATH + file_name +  type_of_activity + \".csv\"\n",
    "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
    "    X_test_table_adjusted_scores.to_csv(file_out, index = False)"
@ -2057,12 +2057,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 77,
   "id": "a974589f-7952-4db2-bebf-7b69c6b09372",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
+    "def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
    "    \n",
    "    duration_ratio = duration_ref/duration_projection\n",
    "\n",
@ -2074,45 +2074,17 @@
    "    df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n",
    "    df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n",
    "\n",
    "    df_output.loc[:,\"pace_purchase\"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)\n",
    "    \n",
    "    return df_output\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 79,
   "id": "dd8a52e1-d06e-4790-8687-8e58e3e6b84e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_521/3689439025.py:7: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df_output.loc[:,\"nb_tickets_projected\"] = df_output.loc[:,nb_tickets] / duration_ratio\n",
      "/tmp/ipykernel_521/3689439025.py:8: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df_output.loc[:,\"total_amount_projected\"] = df_output.loc[:,total_amount] / duration_ratio\n",
      "/tmp/ipykernel_521/3689439025.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n",
      "/tmp/ipykernel_521/3689439025.py:11: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n"
     ]
    },
    {
     "data": {
      "text/html": [
@ -2145,7 +2117,6 @@
       "      <th>nb_tickets_internet</th>\n",
       "      <th>fidelity</th>\n",
       "      <th>...</th>\n",
       "      <th>nb_campaigns_opened</th>\n",
       "      <th>has_purchased</th>\n",
       "      <th>has_purchased_estim</th>\n",
       "      <th>score</th>\n",
@ -2155,6 +2126,7 @@
       "      <th>total_amount_projected</th>\n",
       "      <th>nb_tickets_expected</th>\n",
       "      <th>total_amount_expected</th>\n",
       "      <th>pace_purchase</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
@ -2172,15 +2144,15 @@
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.657671</td>\n",
       "      <td>3</td>\n",
       "      <td>0.240397</td>\n",
-       "      <td>2.666667</td>\n",
+       "      <td>2.823529</td>\n",
-       "      <td>66.666667</td>\n",
+       "      <td>70.588235</td>\n",
-       "      <td>0.641059</td>\n",
+       "      <td>0.678768</td>\n",
-       "      <td>16.026472</td>\n",
+       "      <td>16.969205</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
@ -2195,16 +2167,16 @@
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.266538</td>\n",
       "      <td>2</td>\n",
       "      <td>0.056482</td>\n",
-       "      <td>0.666667</td>\n",
+       "      <td>0.705882</td>\n",
-       "      <td>36.666667</td>\n",
+       "      <td>38.823529</td>\n",
-       "      <td>0.037655</td>\n",
+       "      <td>0.039870</td>\n",
-       "      <td>2.071006</td>\n",
+       "      <td>2.192830</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
@ -2221,14 +2193,14 @@
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.214668</td>\n",
       "      <td>1</td>\n",
       "      <td>0.043089</td>\n",
-       "      <td>11.333333</td>\n",
+       "      <td>12.000000</td>\n",
-       "      <td>53.333333</td>\n",
+       "      <td>56.470588</td>\n",
-       "      <td>0.488340</td>\n",
+       "      <td>0.517065</td>\n",
-       "      <td>2.298068</td>\n",
+       "      <td>2.433249</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
@ -2244,15 +2216,15 @@
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.657770</td>\n",
       "      <td>3</td>\n",
       "      <td>0.240478</td>\n",
-       "      <td>2.666667</td>\n",
+       "      <td>2.823529</td>\n",
-       "      <td>80.000000</td>\n",
+       "      <td>84.705882</td>\n",
-       "      <td>0.641273</td>\n",
+       "      <td>0.678995</td>\n",
-       "      <td>19.238202</td>\n",
+       "      <td>20.369861</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
@ -2267,16 +2239,16 @@
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.894173</td>\n",
       "      <td>4</td>\n",
       "      <td>0.581920</td>\n",
-       "      <td>22.666667</td>\n",
+       "      <td>24.000000</td>\n",
-       "      <td>277.333333</td>\n",
+       "      <td>293.647059</td>\n",
-       "      <td>13.190183</td>\n",
+       "      <td>13.966076</td>\n",
-       "      <td>161.385771</td>\n",
+       "      <td>170.879052</td>\n",
       "      <td>8.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
@ -2315,16 +2287,16 @@
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.623551</td>\n",
       "      <td>3</td>\n",
       "      <td>0.214369</td>\n",
-       "      <td>0.666667</td>\n",
+       "      <td>0.705882</td>\n",
-       "      <td>44.873333</td>\n",
+       "      <td>47.512941</td>\n",
-       "      <td>0.142913</td>\n",
+       "      <td>0.151320</td>\n",
-       "      <td>9.619467</td>\n",
+       "      <td>10.185318</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96092</th>\n",
@ -2339,16 +2311,16 @@
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.682521</td>\n",
       "      <td>3</td>\n",
       "      <td>0.261526</td>\n",
-       "      <td>0.666667</td>\n",
+       "      <td>0.705882</td>\n",
-       "      <td>40.940000</td>\n",
+       "      <td>43.348235</td>\n",
-       "      <td>0.174351</td>\n",
+       "      <td>0.184607</td>\n",
-       "      <td>10.706885</td>\n",
+       "      <td>11.336701</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96093</th>\n",
@ -2363,7 +2335,6 @@
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.117192</td>\n",
@ -2373,6 +2344,7 @@
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96094</th>\n",
@ -2387,16 +2359,16 @@
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.625185</td>\n",
       "      <td>3</td>\n",
       "      <td>0.215545</td>\n",
-       "      <td>0.666667</td>\n",
+       "      <td>0.705882</td>\n",
-       "      <td>52.953333</td>\n",
+       "      <td>56.068235</td>\n",
-       "      <td>0.143697</td>\n",
+       "      <td>0.152150</td>\n",
-       "      <td>11.413840</td>\n",
+       "      <td>12.085242</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96095</th>\n",
@ -2411,7 +2383,6 @@
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.319585</td>\n",
@ -2421,10 +2392,11 @@
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
-       "<p>96096 rows × 26 columns</p>\n",
+       "<p>96096 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
@ -2467,68 +2439,95 @@
       "96094               0.000000                  1.0         1  ...   \n",
       "96095              -1.000000                  0.0         2  ...   \n",
       "\n",
-       "       nb_campaigns_opened  has_purchased  has_purchased_estim     score  \\\n",
+       "       has_purchased  has_purchased_estim     score  quartile  score_adjusted  \\\n",
-       "0                      0.0            0.0                  1.0  0.657671   \n",
+       "0                0.0                  1.0  0.657671         3        0.240397   \n",
-       "1                      0.0            1.0                  0.0  0.266538   \n",
+       "1                1.0                  0.0  0.266538         2        0.056482   \n",
-       "2                      0.0            0.0                  0.0  0.214668   \n",
+       "2                0.0                  0.0  0.214668         1        0.043089   \n",
-       "3                      0.0            0.0                  1.0  0.657770   \n",
+       "3                0.0                  1.0  0.657770         3        0.240478   \n",
-       "4                      0.0            1.0                  1.0  0.894173   \n",
+       "4                1.0                  1.0  0.894173         4        0.581920   \n",
-       "...                    ...            ...                  ...       ...   \n",
+       "...              ...                  ...       ...       ...             ...   \n",
-       "96091                  5.0            1.0                  1.0  0.623551   \n",
+       "96091            1.0                  1.0  0.623551         3        0.214369   \n",
-       "96092                  9.0            0.0                  1.0  0.682521   \n",
+       "96092            0.0                  1.0  0.682521         3        0.261526   \n",
-       "96093                  3.0            0.0                  0.0  0.117192   \n",
+       "96093            0.0                  0.0  0.117192         1        0.021400   \n",
-       "96094                  4.0            0.0                  1.0  0.625185   \n",
+       "96094            0.0                  1.0  0.625185         3        0.215545   \n",
-       "96095                  4.0            0.0                  0.0  0.319585   \n",
+       "96095            0.0                  0.0  0.319585         2        0.071817   \n",
       "\n",
-       "       quartile  score_adjusted  nb_tickets_projected  total_amount_projected  \\\n",
+       "       nb_tickets_projected  total_amount_projected  nb_tickets_expected  \\\n",
-       "0             3        0.240397              2.666667               66.666667   \n",
+       "0                  2.823529               70.588235             0.678768   \n",
-       "1             2        0.056482              0.666667               36.666667   \n",
+       "1                  0.705882               38.823529             0.039870   \n",
-       "2             1        0.043089             11.333333               53.333333   \n",
+       "2                 12.000000               56.470588             0.517065   \n",
-       "3             3        0.240478              2.666667               80.000000   \n",
+       "3                  2.823529               84.705882             0.678995   \n",
-       "4             4        0.581920             22.666667              277.333333   \n",
+       "4                 24.000000              293.647059            13.966076   \n",
-       "...         ...             ...                   ...                     ...   \n",
+       "...                     ...                     ...                  ...   \n",
-       "96091         3        0.214369              0.666667               44.873333   \n",
+       "96091              0.705882               47.512941             0.151320   \n",
-       "96092         3        0.261526              0.666667               40.940000   \n",
+       "96092              0.705882               43.348235             0.184607   \n",
-       "96093         1        0.021400              0.000000                0.000000   \n",
+       "96093              0.000000                0.000000             0.000000   \n",
-       "96094         3        0.215545              0.666667               52.953333   \n",
+       "96094              0.705882               56.068235             0.152150   \n",
-       "96095         2        0.071817              0.000000                0.000000   \n",
+       "96095              0.000000                0.000000             0.000000   \n",
       "\n",
-       "       nb_tickets_expected  total_amount_expected  \n",
+       "       total_amount_expected  pace_purchase  \n",
-       "0                 0.641059              16.026472  \n",
+       "0                  16.969205           17.0  \n",
-       "1                 0.037655               2.071006  \n",
+       "1                   2.192830           17.0  \n",
-       "2                 0.488340               2.298068  \n",
+       "2                   2.433249           17.0  \n",
-       "3                 0.641273              19.238202  \n",
+       "3                  20.369861           17.0  \n",
-       "4                13.190183             161.385771  \n",
+       "4                 170.879052            8.5  \n",
-       "...                    ...                    ...  \n",
+       "...                      ...            ...  \n",
-       "96091             0.142913               9.619467  \n",
+       "96091              10.185318           17.0  \n",
-       "96092             0.174351              10.706885  \n",
+       "96092              11.336701           17.0  \n",
-       "96093             0.000000               0.000000  \n",
+       "96093               0.000000            NaN  \n",
-       "96094             0.143697              11.413840  \n",
+       "96094              12.085242           17.0  \n",
-       "96095             0.000000               0.000000  \n",
+       "96095               0.000000            NaN  \n",
       "\n",
-       "[96096 rows x 26 columns]"
+       "[96096 rows x 27 columns]"
      ]
     },
-     "execution_count": 45,
+     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "X_test_segment = project_tickets_CA (X_test_segment, \"nb_tickets\", \"total_amount\", \"score_adjusted\", duration_ref=1.5, duration_projection=1)\n",
+    "X_test_segment = project_tickets_CA (X_test_segment, \"nb_purchases\", \"nb_tickets\", \"total_amount\", \"score_adjusted\", \n",
    "                                     duration_ref=17, duration_projection=12)\n",
    "X_test_segment"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 82,
   "id": "cb66a8ea-65f7-460f-b3fc-ba76a3b91faa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "quartile\n",
       "1    16.581057\n",
       "2    15.840818\n",
       "3    14.888091\n",
       "4     4.830480\n",
       "Name: pace_purchase, dtype: float64"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_segment.groupby(\"quartile\")[\"pace_purchase\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "f58f9151-2f91-45df-abb7-1ddcf0652adc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# generalization with a function\n",
    "\n",
-    "def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount,\n",
+    "def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount, pace_purchase,\n",
    "                       duration_ref=1.5, duration_projection=1) :\n",
    "    \n",
    "    # compute nb tickets estimated and total amount expected\n",
@ -2545,13 +2544,16 @@
    "    \n",
    "    df_expected_CA[\"revenue_recovered_perct\"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \\\n",
    "    df.groupby(segment)[total_amount].sum().values\n",
    "\n",
    "    df_drop_null_pace = df.dropna(subset=[pace_purchase])\n",
    "    df_expected_CA[\"pace_purchase\"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values\n",
    "    \n",
    "    return df_expected_CA"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 119,
   "id": "c8df6c80-43e8-4f00-9cd3-eb9022744313",
   "metadata": {},
   "outputs": [
@ -2582,6 +2584,7 @@
       "      <th>nb_tickets_expected</th>\n",
       "      <th>total_amount_expected</th>\n",
       "      <th>revenue_recovered_perct</th>\n",
       "      <th>pace_purchase</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
@ -2590,36 +2593,40 @@
       "      <td>1</td>\n",
       "      <td>37410</td>\n",
       "      <td>38.93</td>\n",
-       "      <td>84.76</td>\n",
+       "      <td>89.75</td>\n",
-       "      <td>1867.19</td>\n",
+       "      <td>1977.02</td>\n",
-       "      <td>4.38</td>\n",
+       "      <td>4.64</td>\n",
       "      <td>16.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>29517</td>\n",
       "      <td>30.72</td>\n",
-       "      <td>2899.29</td>\n",
+       "      <td>3069.83</td>\n",
-       "      <td>74461.02</td>\n",
+       "      <td>78841.08</td>\n",
-       "      <td>9.85</td>\n",
+       "      <td>10.43</td>\n",
       "      <td>15.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>20137</td>\n",
       "      <td>20.96</td>\n",
-       "      <td>10876.79</td>\n",
+       "      <td>11516.60</td>\n",
-       "      <td>344286.66</td>\n",
+       "      <td>364538.82</td>\n",
-       "      <td>22.84</td>\n",
+       "      <td>24.19</td>\n",
       "      <td>14.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>9032</td>\n",
       "      <td>9.40</td>\n",
-       "      <td>215194.83</td>\n",
+       "      <td>227853.35</td>\n",
-       "      <td>9899417.81</td>\n",
+       "      <td>10481736.51</td>\n",
-       "      <td>90.11</td>\n",
+       "      <td>95.41</td>\n",
       "      <td>4.83</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
@ -2627,43 +2634,44 @@
      ],
      "text/plain": [
       "  quartile   size  size_perct  nb_tickets_expected  total_amount_expected  \\\n",
-       "0        1  37410       38.93                84.76                1867.19   \n",
+       "0        1  37410       38.93                89.75                1977.02   \n",
-       "1        2  29517       30.72              2899.29               74461.02   \n",
+       "1        2  29517       30.72              3069.83               78841.08   \n",
-       "2        3  20137       20.96             10876.79              344286.66   \n",
+       "2        3  20137       20.96             11516.60              364538.82   \n",
-       "3        4   9032        9.40            215194.83             9899417.81   \n",
+       "3        4   9032        9.40            227853.35            10481736.51   \n",
       "\n",
-       "   revenue_recovered_perct  \n",
+       "   revenue_recovered_perct  pace_purchase  \n",
-       "0                     4.38  \n",
+       "0                     4.64          16.58  \n",
-       "1                     9.85  \n",
+       "1                    10.43          15.84  \n",
-       "2                    22.84  \n",
+       "2                    24.19          14.89  \n",
-       "3                    90.11  "
+       "3                    95.41           4.83  "
      ]
     },
-     "execution_count": 47,
+     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", nb_tickets_expected=\"nb_tickets_expected\", \n",
+    "X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", \n",
-    "                    total_amount_expected=\"total_amount_expected\", total_amount=\"total_amount\"),2)\n",
+    "                    nb_tickets_expected=\"nb_tickets_expected\", total_amount_expected=\"total_amount_expected\", \n",
    "                    total_amount=\"total_amount\", pace_purchase=\"pace_purchase\"),2)\n",
    "\n",
    "X_test_expected_CA"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 120,
   "id": "ac706ed7-defa-4df1-82e1-06f12fc1b6ad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "'\\\\begin{tabular}{lrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) \\\\\\\\\\n\\\\midrule\\n1 & 37410 & 38.930000 & 84.760000 & 1867.190000 & 4.380000 \\\\\\\\\\n2 & 29517 & 30.720000 & 2899.290000 & 74461.020000 & 9.850000 \\\\\\\\\\n3 & 20137 & 20.960000 & 10876.790000 & 344286.660000 & 22.840000 \\\\\\\\\\n4 & 9032 & 9.400000 & 215194.830000 & 9899417.810000 & 90.110000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
+       "'\\\\begin{tabular}{lrrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) & pace purchase \\\\\\\\\\n\\\\midrule\\n1 & 37410 & 38.930000 & 89.750000 & 1977.020000 & 4.640000 & 16.580000 \\\\\\\\\\n2 & 29517 & 30.720000 & 3069.830000 & 78841.080000 & 10.430000 & 15.840000 \\\\\\\\\\n3 & 20137 & 20.960000 & 11516.600000 & 364538.820000 & 24.190000 & 14.890000 \\\\\\\\\\n4 & 9032 & 9.400000 & 227853.350000 & 10481736.510000 & 95.410000 & 4.830000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
      ]
     },
-     "execution_count": 48,
+     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -2677,14 +2685,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 122,
   "id": "771da0cf-c49f-4e7e-b52f-ebcfb0fb2df3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# export summary table to the MinIO storage\n",
    "\n",
-    "file_name = \"table_expected_CA\"\n",
+    "file_name = \"table_expected_CA_\"\n",
    "FILE_PATH_OUT_S3 = PATH + file_name +  type_of_activity + \".csv\"\n",
    "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
    "    X_test_expected_CA.to_csv(file_out, index = False)"
--- a/Sport/Modelization/segment_analysis_sport_0_6.ipynb
+++ b/Sport/Modelization/segment_analysis_sport_0_6.ipynb
--- a/utils_CA_segment.py
+++ b/utils_CA_segment.py
@ -85,17 +85,18 @@ def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
    # plt.show()
-def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) : 
+def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) : 
    """
    Project ticket counts and total amount for a given duration and adjust based on a score.
    Args:
    - df (DataFrame): DataFrame containing ticket data.
    - nb_purchases (str) : Name of the column in df representing the number of purchases.
    - nb_tickets (str): Name of the column in df representing the number of tickets.
    - total_amount (str): Name of the column in df representing the total amount.
    - score_adjusted (str): Name of the column in df representing the adjusted score.
-    - duration_ref (int or float): Reference duration for the project.
+    - duration_ref (int or float): duration of the period of reference for the construction of the variables X.
-    - duration_projection (int or float): Duration for which the projection is made.
+    - duration_projection (int or float): Duration of the period of projection of sales / revenue. 
    Returns:
    DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score.
@ -112,6 +113,8 @@ def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_r
    df_output.loc[:,"nb_tickets_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"nb_tickets_projected"]
    df_output.loc[:,"total_amount_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"total_amount_projected"]
    df_output.loc[:,"pace_purchase"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)
    return df_output
@ -144,5 +147,8 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected,
    df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
    df.groupby(segment)[total_amount].sum().values
    df_drop_null_pace = df.dropna(subset=[pace_purchase])
    df_expected_CA["pace_purchase"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values
    return df_expected_CA