completed segment mp analysis sport

This commit is contained in:
Thomas PIQUE 2024-03-26 11:20:03 +00:00
parent dbd87dadd9
commit 2165c7c16e
3 changed files with 1893 additions and 152 deletions

View File

@ -2049,7 +2049,7 @@
"source": [
"# comparison between score and adjusted score - export csv associated\n",
"\n",
"file_name = \"table_adjusted_score\"\n",
"file_name = \"table_adjusted_score_\"\n",
"FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n",
"with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
" X_test_table_adjusted_scores.to_csv(file_out, index = False)"
@ -2057,12 +2057,12 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 77,
"id": "a974589f-7952-4db2-bebf-7b69c6b09372",
"metadata": {},
"outputs": [],
"source": [
"def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
"def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
" \n",
" duration_ratio = duration_ref/duration_projection\n",
"\n",
@ -2074,45 +2074,17 @@
" df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n",
" df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n",
"\n",
" df_output.loc[:,\"pace_purchase\"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)\n",
" \n",
" return df_output\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 79,
"id": "dd8a52e1-d06e-4790-8687-8e58e3e6b84e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_521/3689439025.py:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_output.loc[:,\"nb_tickets_projected\"] = df_output.loc[:,nb_tickets] / duration_ratio\n",
"/tmp/ipykernel_521/3689439025.py:8: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_output.loc[:,\"total_amount_projected\"] = df_output.loc[:,total_amount] / duration_ratio\n",
"/tmp/ipykernel_521/3689439025.py:10: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n",
"/tmp/ipykernel_521/3689439025.py:11: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n"
]
},
{
"data": {
"text/html": [
@ -2145,7 +2117,6 @@
" <th>nb_tickets_internet</th>\n",
" <th>fidelity</th>\n",
" <th>...</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>has_purchased</th>\n",
" <th>has_purchased_estim</th>\n",
" <th>score</th>\n",
@ -2155,6 +2126,7 @@
" <th>total_amount_projected</th>\n",
" <th>nb_tickets_expected</th>\n",
" <th>total_amount_expected</th>\n",
" <th>pace_purchase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
@ -2172,15 +2144,15 @@
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.657671</td>\n",
" <td>3</td>\n",
" <td>0.240397</td>\n",
" <td>2.666667</td>\n",
" <td>66.666667</td>\n",
" <td>0.641059</td>\n",
" <td>16.026472</td>\n",
" <td>2.823529</td>\n",
" <td>70.588235</td>\n",
" <td>0.678768</td>\n",
" <td>16.969205</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
@ -2195,16 +2167,16 @@
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.266538</td>\n",
" <td>2</td>\n",
" <td>0.056482</td>\n",
" <td>0.666667</td>\n",
" <td>36.666667</td>\n",
" <td>0.037655</td>\n",
" <td>2.071006</td>\n",
" <td>0.705882</td>\n",
" <td>38.823529</td>\n",
" <td>0.039870</td>\n",
" <td>2.192830</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
@ -2221,14 +2193,14 @@
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.214668</td>\n",
" <td>1</td>\n",
" <td>0.043089</td>\n",
" <td>11.333333</td>\n",
" <td>53.333333</td>\n",
" <td>0.488340</td>\n",
" <td>2.298068</td>\n",
" <td>12.000000</td>\n",
" <td>56.470588</td>\n",
" <td>0.517065</td>\n",
" <td>2.433249</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
@ -2244,15 +2216,15 @@
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.657770</td>\n",
" <td>3</td>\n",
" <td>0.240478</td>\n",
" <td>2.666667</td>\n",
" <td>80.000000</td>\n",
" <td>0.641273</td>\n",
" <td>19.238202</td>\n",
" <td>2.823529</td>\n",
" <td>84.705882</td>\n",
" <td>0.678995</td>\n",
" <td>20.369861</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
@ -2267,16 +2239,16 @@
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.894173</td>\n",
" <td>4</td>\n",
" <td>0.581920</td>\n",
" <td>22.666667</td>\n",
" <td>277.333333</td>\n",
" <td>13.190183</td>\n",
" <td>161.385771</td>\n",
" <td>24.000000</td>\n",
" <td>293.647059</td>\n",
" <td>13.966076</td>\n",
" <td>170.879052</td>\n",
" <td>8.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
@ -2315,16 +2287,16 @@
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.623551</td>\n",
" <td>3</td>\n",
" <td>0.214369</td>\n",
" <td>0.666667</td>\n",
" <td>44.873333</td>\n",
" <td>0.142913</td>\n",
" <td>9.619467</td>\n",
" <td>0.705882</td>\n",
" <td>47.512941</td>\n",
" <td>0.151320</td>\n",
" <td>10.185318</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
@ -2339,16 +2311,16 @@
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.682521</td>\n",
" <td>3</td>\n",
" <td>0.261526</td>\n",
" <td>0.666667</td>\n",
" <td>40.940000</td>\n",
" <td>0.174351</td>\n",
" <td>10.706885</td>\n",
" <td>0.705882</td>\n",
" <td>43.348235</td>\n",
" <td>0.184607</td>\n",
" <td>11.336701</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
@ -2363,7 +2335,6 @@
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.117192</td>\n",
@ -2373,6 +2344,7 @@
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
@ -2387,16 +2359,16 @@
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.625185</td>\n",
" <td>3</td>\n",
" <td>0.215545</td>\n",
" <td>0.666667</td>\n",
" <td>52.953333</td>\n",
" <td>0.143697</td>\n",
" <td>11.413840</td>\n",
" <td>0.705882</td>\n",
" <td>56.068235</td>\n",
" <td>0.152150</td>\n",
" <td>12.085242</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
@ -2411,7 +2383,6 @@
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.319585</td>\n",
@ -2421,10 +2392,11 @@
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 26 columns</p>\n",
"<p>96096 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
@ -2467,68 +2439,95 @@
"96094 0.000000 1.0 1 ... \n",
"96095 -1.000000 0.0 2 ... \n",
"\n",
" nb_campaigns_opened has_purchased has_purchased_estim score \\\n",
"0 0.0 0.0 1.0 0.657671 \n",
"1 0.0 1.0 0.0 0.266538 \n",
"2 0.0 0.0 0.0 0.214668 \n",
"3 0.0 0.0 1.0 0.657770 \n",
"4 0.0 1.0 1.0 0.894173 \n",
"... ... ... ... ... \n",
"96091 5.0 1.0 1.0 0.623551 \n",
"96092 9.0 0.0 1.0 0.682521 \n",
"96093 3.0 0.0 0.0 0.117192 \n",
"96094 4.0 0.0 1.0 0.625185 \n",
"96095 4.0 0.0 0.0 0.319585 \n",
" has_purchased has_purchased_estim score quartile score_adjusted \\\n",
"0 0.0 1.0 0.657671 3 0.240397 \n",
"1 1.0 0.0 0.266538 2 0.056482 \n",
"2 0.0 0.0 0.214668 1 0.043089 \n",
"3 0.0 1.0 0.657770 3 0.240478 \n",
"4 1.0 1.0 0.894173 4 0.581920 \n",
"... ... ... ... ... ... \n",
"96091 1.0 1.0 0.623551 3 0.214369 \n",
"96092 0.0 1.0 0.682521 3 0.261526 \n",
"96093 0.0 0.0 0.117192 1 0.021400 \n",
"96094 0.0 1.0 0.625185 3 0.215545 \n",
"96095 0.0 0.0 0.319585 2 0.071817 \n",
"\n",
" quartile score_adjusted nb_tickets_projected total_amount_projected \\\n",
"0 3 0.240397 2.666667 66.666667 \n",
"1 2 0.056482 0.666667 36.666667 \n",
"2 1 0.043089 11.333333 53.333333 \n",
"3 3 0.240478 2.666667 80.000000 \n",
"4 4 0.581920 22.666667 277.333333 \n",
"... ... ... ... ... \n",
"96091 3 0.214369 0.666667 44.873333 \n",
"96092 3 0.261526 0.666667 40.940000 \n",
"96093 1 0.021400 0.000000 0.000000 \n",
"96094 3 0.215545 0.666667 52.953333 \n",
"96095 2 0.071817 0.000000 0.000000 \n",
" nb_tickets_projected total_amount_projected nb_tickets_expected \\\n",
"0 2.823529 70.588235 0.678768 \n",
"1 0.705882 38.823529 0.039870 \n",
"2 12.000000 56.470588 0.517065 \n",
"3 2.823529 84.705882 0.678995 \n",
"4 24.000000 293.647059 13.966076 \n",
"... ... ... ... \n",
"96091 0.705882 47.512941 0.151320 \n",
"96092 0.705882 43.348235 0.184607 \n",
"96093 0.000000 0.000000 0.000000 \n",
"96094 0.705882 56.068235 0.152150 \n",
"96095 0.000000 0.000000 0.000000 \n",
"\n",
" nb_tickets_expected total_amount_expected \n",
"0 0.641059 16.026472 \n",
"1 0.037655 2.071006 \n",
"2 0.488340 2.298068 \n",
"3 0.641273 19.238202 \n",
"4 13.190183 161.385771 \n",
"... ... ... \n",
"96091 0.142913 9.619467 \n",
"96092 0.174351 10.706885 \n",
"96093 0.000000 0.000000 \n",
"96094 0.143697 11.413840 \n",
"96095 0.000000 0.000000 \n",
" total_amount_expected pace_purchase \n",
"0 16.969205 17.0 \n",
"1 2.192830 17.0 \n",
"2 2.433249 17.0 \n",
"3 20.369861 17.0 \n",
"4 170.879052 8.5 \n",
"... ... ... \n",
"96091 10.185318 17.0 \n",
"96092 11.336701 17.0 \n",
"96093 0.000000 NaN \n",
"96094 12.085242 17.0 \n",
"96095 0.000000 NaN \n",
"\n",
"[96096 rows x 26 columns]"
"[96096 rows x 27 columns]"
]
},
"execution_count": 45,
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment = project_tickets_CA (X_test_segment, \"nb_tickets\", \"total_amount\", \"score_adjusted\", duration_ref=1.5, duration_projection=1)\n",
"X_test_segment = project_tickets_CA (X_test_segment, \"nb_purchases\", \"nb_tickets\", \"total_amount\", \"score_adjusted\", \n",
" duration_ref=17, duration_projection=12)\n",
"X_test_segment"
]
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 82,
"id": "cb66a8ea-65f7-460f-b3fc-ba76a3b91faa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"quartile\n",
"1 16.581057\n",
"2 15.840818\n",
"3 14.888091\n",
"4 4.830480\n",
"Name: pace_purchase, dtype: float64"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment.groupby(\"quartile\")[\"pace_purchase\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "f58f9151-2f91-45df-abb7-1ddcf0652adc",
"metadata": {},
"outputs": [],
"source": [
"# generalization with a function\n",
"\n",
"def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount,\n",
"def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount, pace_purchase,\n",
" duration_ref=1.5, duration_projection=1) :\n",
" \n",
" # compute nb tickets estimated and total amount expected\n",
@ -2545,13 +2544,16 @@
" \n",
" df_expected_CA[\"revenue_recovered_perct\"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \\\n",
" df.groupby(segment)[total_amount].sum().values\n",
"\n",
" df_drop_null_pace = df.dropna(subset=[pace_purchase])\n",
" df_expected_CA[\"pace_purchase\"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values\n",
" \n",
" return df_expected_CA"
]
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 119,
"id": "c8df6c80-43e8-4f00-9cd3-eb9022744313",
"metadata": {},
"outputs": [
@ -2582,6 +2584,7 @@
" <th>nb_tickets_expected</th>\n",
" <th>total_amount_expected</th>\n",
" <th>revenue_recovered_perct</th>\n",
" <th>pace_purchase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
@ -2590,36 +2593,40 @@
" <td>1</td>\n",
" <td>37410</td>\n",
" <td>38.93</td>\n",
" <td>84.76</td>\n",
" <td>1867.19</td>\n",
" <td>4.38</td>\n",
" <td>89.75</td>\n",
" <td>1977.02</td>\n",
" <td>4.64</td>\n",
" <td>16.58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>29517</td>\n",
" <td>30.72</td>\n",
" <td>2899.29</td>\n",
" <td>74461.02</td>\n",
" <td>9.85</td>\n",
" <td>3069.83</td>\n",
" <td>78841.08</td>\n",
" <td>10.43</td>\n",
" <td>15.84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>20137</td>\n",
" <td>20.96</td>\n",
" <td>10876.79</td>\n",
" <td>344286.66</td>\n",
" <td>22.84</td>\n",
" <td>11516.60</td>\n",
" <td>364538.82</td>\n",
" <td>24.19</td>\n",
" <td>14.89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>9032</td>\n",
" <td>9.40</td>\n",
" <td>215194.83</td>\n",
" <td>9899417.81</td>\n",
" <td>90.11</td>\n",
" <td>227853.35</td>\n",
" <td>10481736.51</td>\n",
" <td>95.41</td>\n",
" <td>4.83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -2627,43 +2634,44 @@
],
"text/plain": [
" quartile size size_perct nb_tickets_expected total_amount_expected \\\n",
"0 1 37410 38.93 84.76 1867.19 \n",
"1 2 29517 30.72 2899.29 74461.02 \n",
"2 3 20137 20.96 10876.79 344286.66 \n",
"3 4 9032 9.40 215194.83 9899417.81 \n",
"0 1 37410 38.93 89.75 1977.02 \n",
"1 2 29517 30.72 3069.83 78841.08 \n",
"2 3 20137 20.96 11516.60 364538.82 \n",
"3 4 9032 9.40 227853.35 10481736.51 \n",
"\n",
" revenue_recovered_perct \n",
"0 4.38 \n",
"1 9.85 \n",
"2 22.84 \n",
"3 90.11 "
" revenue_recovered_perct pace_purchase \n",
"0 4.64 16.58 \n",
"1 10.43 15.84 \n",
"2 24.19 14.89 \n",
"3 95.41 4.83 "
]
},
"execution_count": 47,
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", nb_tickets_expected=\"nb_tickets_expected\", \n",
" total_amount_expected=\"total_amount_expected\", total_amount=\"total_amount\"),2)\n",
"X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", \n",
" nb_tickets_expected=\"nb_tickets_expected\", total_amount_expected=\"total_amount_expected\", \n",
" total_amount=\"total_amount\", pace_purchase=\"pace_purchase\"),2)\n",
"\n",
"X_test_expected_CA"
]
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 120,
"id": "ac706ed7-defa-4df1-82e1-06f12fc1b6ad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\\\begin{tabular}{lrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) \\\\\\\\\\n\\\\midrule\\n1 & 37410 & 38.930000 & 84.760000 & 1867.190000 & 4.380000 \\\\\\\\\\n2 & 29517 & 30.720000 & 2899.290000 & 74461.020000 & 9.850000 \\\\\\\\\\n3 & 20137 & 20.960000 & 10876.790000 & 344286.660000 & 22.840000 \\\\\\\\\\n4 & 9032 & 9.400000 & 215194.830000 & 9899417.810000 & 90.110000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
"'\\\\begin{tabular}{lrrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) & pace purchase \\\\\\\\\\n\\\\midrule\\n1 & 37410 & 38.930000 & 89.750000 & 1977.020000 & 4.640000 & 16.580000 \\\\\\\\\\n2 & 29517 & 30.720000 & 3069.830000 & 78841.080000 & 10.430000 & 15.840000 \\\\\\\\\\n3 & 20137 & 20.960000 & 11516.600000 & 364538.820000 & 24.190000 & 14.890000 \\\\\\\\\\n4 & 9032 & 9.400000 & 227853.350000 & 10481736.510000 & 95.410000 & 4.830000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
]
},
"execution_count": 48,
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
@ -2677,14 +2685,14 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 122,
"id": "771da0cf-c49f-4e7e-b52f-ebcfb0fb2df3",
"metadata": {},
"outputs": [],
"source": [
"# export summary table to the MinIO storage\n",
"\n",
"file_name = \"table_expected_CA\"\n",
"file_name = \"table_expected_CA_\"\n",
"FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n",
"with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
" X_test_expected_CA.to_csv(file_out, index = False)"

File diff suppressed because one or more lines are too long

View File

@ -85,17 +85,18 @@ def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
# plt.show()
def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
"""
Project ticket counts and total amount for a given duration and adjust based on a score.
Args:
- df (DataFrame): DataFrame containing ticket data.
- nb_purchases (str) : Name of the column in df representing the number of purchases.
- nb_tickets (str): Name of the column in df representing the number of tickets.
- total_amount (str): Name of the column in df representing the total amount.
- score_adjusted (str): Name of the column in df representing the adjusted score.
- duration_ref (int or float): Reference duration for the project.
- duration_projection (int or float): Duration for which the projection is made.
- duration_ref (int or float): duration of the period of reference for the construction of the variables X.
- duration_projection (int or float): Duration of the period of projection of sales / revenue.
Returns:
DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score.
@ -112,6 +113,8 @@ def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_r
df_output.loc[:,"nb_tickets_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"nb_tickets_projected"]
df_output.loc[:,"total_amount_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"total_amount_projected"]
df_output.loc[:,"pace_purchase"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)
return df_output
@ -144,5 +147,8 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected,
df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
df.groupby(segment)[total_amount].sum().values
df_drop_null_pace = df.dropna(subset=[pace_purchase])
df_expected_CA["pace_purchase"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values
return df_expected_CA