completed segment mp analysis sport
This commit is contained in:
parent
dbd87dadd9
commit
2165c7c16e
|
@ -2049,7 +2049,7 @@
|
|||
"source": [
|
||||
"# comparison between score and adjusted score - export csv associated\n",
|
||||
"\n",
|
||||
"file_name = \"table_adjusted_score\"\n",
|
||||
"file_name = \"table_adjusted_score_\"\n",
|
||||
"FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n",
|
||||
"with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
|
||||
" X_test_table_adjusted_scores.to_csv(file_out, index = False)"
|
||||
|
@ -2057,12 +2057,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 77,
|
||||
"id": "a974589f-7952-4db2-bebf-7b69c6b09372",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
|
||||
"def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
|
||||
" \n",
|
||||
" duration_ratio = duration_ref/duration_projection\n",
|
||||
"\n",
|
||||
|
@ -2074,45 +2074,17 @@
|
|||
" df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n",
|
||||
" df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n",
|
||||
"\n",
|
||||
" df_output.loc[:,\"pace_purchase\"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)\n",
|
||||
" \n",
|
||||
" return df_output\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 79,
|
||||
"id": "dd8a52e1-d06e-4790-8687-8e58e3e6b84e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_521/3689439025.py:7: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" df_output.loc[:,\"nb_tickets_projected\"] = df_output.loc[:,nb_tickets] / duration_ratio\n",
|
||||
"/tmp/ipykernel_521/3689439025.py:8: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" df_output.loc[:,\"total_amount_projected\"] = df_output.loc[:,total_amount] / duration_ratio\n",
|
||||
"/tmp/ipykernel_521/3689439025.py:10: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n",
|
||||
"/tmp/ipykernel_521/3689439025.py:11: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
|
@ -2145,7 +2117,6 @@
|
|||
" <th>nb_tickets_internet</th>\n",
|
||||
" <th>fidelity</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>nb_campaigns_opened</th>\n",
|
||||
" <th>has_purchased</th>\n",
|
||||
" <th>has_purchased_estim</th>\n",
|
||||
" <th>score</th>\n",
|
||||
|
@ -2155,6 +2126,7 @@
|
|||
" <th>total_amount_projected</th>\n",
|
||||
" <th>nb_tickets_expected</th>\n",
|
||||
" <th>total_amount_expected</th>\n",
|
||||
" <th>pace_purchase</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
|
@ -2172,15 +2144,15 @@
|
|||
" <td>1</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.657671</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0.240397</td>\n",
|
||||
" <td>2.666667</td>\n",
|
||||
" <td>66.666667</td>\n",
|
||||
" <td>0.641059</td>\n",
|
||||
" <td>16.026472</td>\n",
|
||||
" <td>2.823529</td>\n",
|
||||
" <td>70.588235</td>\n",
|
||||
" <td>0.678768</td>\n",
|
||||
" <td>16.969205</td>\n",
|
||||
" <td>17.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
|
@ -2195,16 +2167,16 @@
|
|||
" <td>0.0</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.266538</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>0.056482</td>\n",
|
||||
" <td>0.666667</td>\n",
|
||||
" <td>36.666667</td>\n",
|
||||
" <td>0.037655</td>\n",
|
||||
" <td>2.071006</td>\n",
|
||||
" <td>0.705882</td>\n",
|
||||
" <td>38.823529</td>\n",
|
||||
" <td>0.039870</td>\n",
|
||||
" <td>2.192830</td>\n",
|
||||
" <td>17.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
|
@ -2221,14 +2193,14 @@
|
|||
" <td>...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.214668</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0.043089</td>\n",
|
||||
" <td>11.333333</td>\n",
|
||||
" <td>53.333333</td>\n",
|
||||
" <td>0.488340</td>\n",
|
||||
" <td>2.298068</td>\n",
|
||||
" <td>12.000000</td>\n",
|
||||
" <td>56.470588</td>\n",
|
||||
" <td>0.517065</td>\n",
|
||||
" <td>2.433249</td>\n",
|
||||
" <td>17.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
|
@ -2244,15 +2216,15 @@
|
|||
" <td>1</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.657770</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0.240478</td>\n",
|
||||
" <td>2.666667</td>\n",
|
||||
" <td>80.000000</td>\n",
|
||||
" <td>0.641273</td>\n",
|
||||
" <td>19.238202</td>\n",
|
||||
" <td>2.823529</td>\n",
|
||||
" <td>84.705882</td>\n",
|
||||
" <td>0.678995</td>\n",
|
||||
" <td>20.369861</td>\n",
|
||||
" <td>17.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
|
@ -2267,16 +2239,16 @@
|
|||
" <td>0.0</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.894173</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>0.581920</td>\n",
|
||||
" <td>22.666667</td>\n",
|
||||
" <td>277.333333</td>\n",
|
||||
" <td>13.190183</td>\n",
|
||||
" <td>161.385771</td>\n",
|
||||
" <td>24.000000</td>\n",
|
||||
" <td>293.647059</td>\n",
|
||||
" <td>13.966076</td>\n",
|
||||
" <td>170.879052</td>\n",
|
||||
" <td>8.5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
|
@ -2315,16 +2287,16 @@
|
|||
" <td>1.0</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.623551</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0.214369</td>\n",
|
||||
" <td>0.666667</td>\n",
|
||||
" <td>44.873333</td>\n",
|
||||
" <td>0.142913</td>\n",
|
||||
" <td>9.619467</td>\n",
|
||||
" <td>0.705882</td>\n",
|
||||
" <td>47.512941</td>\n",
|
||||
" <td>0.151320</td>\n",
|
||||
" <td>10.185318</td>\n",
|
||||
" <td>17.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>96092</th>\n",
|
||||
|
@ -2339,16 +2311,16 @@
|
|||
" <td>1.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.682521</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0.261526</td>\n",
|
||||
" <td>0.666667</td>\n",
|
||||
" <td>40.940000</td>\n",
|
||||
" <td>0.174351</td>\n",
|
||||
" <td>10.706885</td>\n",
|
||||
" <td>0.705882</td>\n",
|
||||
" <td>43.348235</td>\n",
|
||||
" <td>0.184607</td>\n",
|
||||
" <td>11.336701</td>\n",
|
||||
" <td>17.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>96093</th>\n",
|
||||
|
@ -2363,7 +2335,6 @@
|
|||
" <td>0.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.117192</td>\n",
|
||||
|
@ -2373,6 +2344,7 @@
|
|||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>96094</th>\n",
|
||||
|
@ -2387,16 +2359,16 @@
|
|||
" <td>1.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.625185</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0.215545</td>\n",
|
||||
" <td>0.666667</td>\n",
|
||||
" <td>52.953333</td>\n",
|
||||
" <td>0.143697</td>\n",
|
||||
" <td>11.413840</td>\n",
|
||||
" <td>0.705882</td>\n",
|
||||
" <td>56.068235</td>\n",
|
||||
" <td>0.152150</td>\n",
|
||||
" <td>12.085242</td>\n",
|
||||
" <td>17.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>96095</th>\n",
|
||||
|
@ -2411,7 +2383,6 @@
|
|||
" <td>0.0</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.319585</td>\n",
|
||||
|
@ -2421,10 +2392,11 @@
|
|||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>96096 rows × 26 columns</p>\n",
|
||||
"<p>96096 rows × 27 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
|
@ -2467,68 +2439,95 @@
|
|||
"96094 0.000000 1.0 1 ... \n",
|
||||
"96095 -1.000000 0.0 2 ... \n",
|
||||
"\n",
|
||||
" nb_campaigns_opened has_purchased has_purchased_estim score \\\n",
|
||||
"0 0.0 0.0 1.0 0.657671 \n",
|
||||
"1 0.0 1.0 0.0 0.266538 \n",
|
||||
"2 0.0 0.0 0.0 0.214668 \n",
|
||||
"3 0.0 0.0 1.0 0.657770 \n",
|
||||
"4 0.0 1.0 1.0 0.894173 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"96091 5.0 1.0 1.0 0.623551 \n",
|
||||
"96092 9.0 0.0 1.0 0.682521 \n",
|
||||
"96093 3.0 0.0 0.0 0.117192 \n",
|
||||
"96094 4.0 0.0 1.0 0.625185 \n",
|
||||
"96095 4.0 0.0 0.0 0.319585 \n",
|
||||
" has_purchased has_purchased_estim score quartile score_adjusted \\\n",
|
||||
"0 0.0 1.0 0.657671 3 0.240397 \n",
|
||||
"1 1.0 0.0 0.266538 2 0.056482 \n",
|
||||
"2 0.0 0.0 0.214668 1 0.043089 \n",
|
||||
"3 0.0 1.0 0.657770 3 0.240478 \n",
|
||||
"4 1.0 1.0 0.894173 4 0.581920 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"96091 1.0 1.0 0.623551 3 0.214369 \n",
|
||||
"96092 0.0 1.0 0.682521 3 0.261526 \n",
|
||||
"96093 0.0 0.0 0.117192 1 0.021400 \n",
|
||||
"96094 0.0 1.0 0.625185 3 0.215545 \n",
|
||||
"96095 0.0 0.0 0.319585 2 0.071817 \n",
|
||||
"\n",
|
||||
" quartile score_adjusted nb_tickets_projected total_amount_projected \\\n",
|
||||
"0 3 0.240397 2.666667 66.666667 \n",
|
||||
"1 2 0.056482 0.666667 36.666667 \n",
|
||||
"2 1 0.043089 11.333333 53.333333 \n",
|
||||
"3 3 0.240478 2.666667 80.000000 \n",
|
||||
"4 4 0.581920 22.666667 277.333333 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"96091 3 0.214369 0.666667 44.873333 \n",
|
||||
"96092 3 0.261526 0.666667 40.940000 \n",
|
||||
"96093 1 0.021400 0.000000 0.000000 \n",
|
||||
"96094 3 0.215545 0.666667 52.953333 \n",
|
||||
"96095 2 0.071817 0.000000 0.000000 \n",
|
||||
" nb_tickets_projected total_amount_projected nb_tickets_expected \\\n",
|
||||
"0 2.823529 70.588235 0.678768 \n",
|
||||
"1 0.705882 38.823529 0.039870 \n",
|
||||
"2 12.000000 56.470588 0.517065 \n",
|
||||
"3 2.823529 84.705882 0.678995 \n",
|
||||
"4 24.000000 293.647059 13.966076 \n",
|
||||
"... ... ... ... \n",
|
||||
"96091 0.705882 47.512941 0.151320 \n",
|
||||
"96092 0.705882 43.348235 0.184607 \n",
|
||||
"96093 0.000000 0.000000 0.000000 \n",
|
||||
"96094 0.705882 56.068235 0.152150 \n",
|
||||
"96095 0.000000 0.000000 0.000000 \n",
|
||||
"\n",
|
||||
" nb_tickets_expected total_amount_expected \n",
|
||||
"0 0.641059 16.026472 \n",
|
||||
"1 0.037655 2.071006 \n",
|
||||
"2 0.488340 2.298068 \n",
|
||||
"3 0.641273 19.238202 \n",
|
||||
"4 13.190183 161.385771 \n",
|
||||
"... ... ... \n",
|
||||
"96091 0.142913 9.619467 \n",
|
||||
"96092 0.174351 10.706885 \n",
|
||||
"96093 0.000000 0.000000 \n",
|
||||
"96094 0.143697 11.413840 \n",
|
||||
"96095 0.000000 0.000000 \n",
|
||||
" total_amount_expected pace_purchase \n",
|
||||
"0 16.969205 17.0 \n",
|
||||
"1 2.192830 17.0 \n",
|
||||
"2 2.433249 17.0 \n",
|
||||
"3 20.369861 17.0 \n",
|
||||
"4 170.879052 8.5 \n",
|
||||
"... ... ... \n",
|
||||
"96091 10.185318 17.0 \n",
|
||||
"96092 11.336701 17.0 \n",
|
||||
"96093 0.000000 NaN \n",
|
||||
"96094 12.085242 17.0 \n",
|
||||
"96095 0.000000 NaN \n",
|
||||
"\n",
|
||||
"[96096 rows x 26 columns]"
|
||||
"[96096 rows x 27 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 45,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_test_segment = project_tickets_CA (X_test_segment, \"nb_tickets\", \"total_amount\", \"score_adjusted\", duration_ref=1.5, duration_projection=1)\n",
|
||||
"X_test_segment = project_tickets_CA (X_test_segment, \"nb_purchases\", \"nb_tickets\", \"total_amount\", \"score_adjusted\", \n",
|
||||
" duration_ref=17, duration_projection=12)\n",
|
||||
"X_test_segment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 82,
|
||||
"id": "cb66a8ea-65f7-460f-b3fc-ba76a3b91faa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"quartile\n",
|
||||
"1 16.581057\n",
|
||||
"2 15.840818\n",
|
||||
"3 14.888091\n",
|
||||
"4 4.830480\n",
|
||||
"Name: pace_purchase, dtype: float64"
|
||||
]
|
||||
},
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_test_segment.groupby(\"quartile\")[\"pace_purchase\"].mean()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 118,
|
||||
"id": "f58f9151-2f91-45df-abb7-1ddcf0652adc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# generalization with a function\n",
|
||||
"\n",
|
||||
"def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount,\n",
|
||||
"def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount, pace_purchase,\n",
|
||||
" duration_ref=1.5, duration_projection=1) :\n",
|
||||
" \n",
|
||||
" # compute nb tickets estimated and total amount expected\n",
|
||||
|
@ -2545,13 +2544,16 @@
|
|||
" \n",
|
||||
" df_expected_CA[\"revenue_recovered_perct\"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \\\n",
|
||||
" df.groupby(segment)[total_amount].sum().values\n",
|
||||
"\n",
|
||||
" df_drop_null_pace = df.dropna(subset=[pace_purchase])\n",
|
||||
" df_expected_CA[\"pace_purchase\"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values\n",
|
||||
" \n",
|
||||
" return df_expected_CA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 119,
|
||||
"id": "c8df6c80-43e8-4f00-9cd3-eb9022744313",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -2582,6 +2584,7 @@
|
|||
" <th>nb_tickets_expected</th>\n",
|
||||
" <th>total_amount_expected</th>\n",
|
||||
" <th>revenue_recovered_perct</th>\n",
|
||||
" <th>pace_purchase</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
|
@ -2590,36 +2593,40 @@
|
|||
" <td>1</td>\n",
|
||||
" <td>37410</td>\n",
|
||||
" <td>38.93</td>\n",
|
||||
" <td>84.76</td>\n",
|
||||
" <td>1867.19</td>\n",
|
||||
" <td>4.38</td>\n",
|
||||
" <td>89.75</td>\n",
|
||||
" <td>1977.02</td>\n",
|
||||
" <td>4.64</td>\n",
|
||||
" <td>16.58</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>29517</td>\n",
|
||||
" <td>30.72</td>\n",
|
||||
" <td>2899.29</td>\n",
|
||||
" <td>74461.02</td>\n",
|
||||
" <td>9.85</td>\n",
|
||||
" <td>3069.83</td>\n",
|
||||
" <td>78841.08</td>\n",
|
||||
" <td>10.43</td>\n",
|
||||
" <td>15.84</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>20137</td>\n",
|
||||
" <td>20.96</td>\n",
|
||||
" <td>10876.79</td>\n",
|
||||
" <td>344286.66</td>\n",
|
||||
" <td>22.84</td>\n",
|
||||
" <td>11516.60</td>\n",
|
||||
" <td>364538.82</td>\n",
|
||||
" <td>24.19</td>\n",
|
||||
" <td>14.89</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>9032</td>\n",
|
||||
" <td>9.40</td>\n",
|
||||
" <td>215194.83</td>\n",
|
||||
" <td>9899417.81</td>\n",
|
||||
" <td>90.11</td>\n",
|
||||
" <td>227853.35</td>\n",
|
||||
" <td>10481736.51</td>\n",
|
||||
" <td>95.41</td>\n",
|
||||
" <td>4.83</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
|
@ -2627,43 +2634,44 @@
|
|||
],
|
||||
"text/plain": [
|
||||
" quartile size size_perct nb_tickets_expected total_amount_expected \\\n",
|
||||
"0 1 37410 38.93 84.76 1867.19 \n",
|
||||
"1 2 29517 30.72 2899.29 74461.02 \n",
|
||||
"2 3 20137 20.96 10876.79 344286.66 \n",
|
||||
"3 4 9032 9.40 215194.83 9899417.81 \n",
|
||||
"0 1 37410 38.93 89.75 1977.02 \n",
|
||||
"1 2 29517 30.72 3069.83 78841.08 \n",
|
||||
"2 3 20137 20.96 11516.60 364538.82 \n",
|
||||
"3 4 9032 9.40 227853.35 10481736.51 \n",
|
||||
"\n",
|
||||
" revenue_recovered_perct \n",
|
||||
"0 4.38 \n",
|
||||
"1 9.85 \n",
|
||||
"2 22.84 \n",
|
||||
"3 90.11 "
|
||||
" revenue_recovered_perct pace_purchase \n",
|
||||
"0 4.64 16.58 \n",
|
||||
"1 10.43 15.84 \n",
|
||||
"2 24.19 14.89 \n",
|
||||
"3 95.41 4.83 "
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"execution_count": 119,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", nb_tickets_expected=\"nb_tickets_expected\", \n",
|
||||
" total_amount_expected=\"total_amount_expected\", total_amount=\"total_amount\"),2)\n",
|
||||
"X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", \n",
|
||||
" nb_tickets_expected=\"nb_tickets_expected\", total_amount_expected=\"total_amount_expected\", \n",
|
||||
" total_amount=\"total_amount\", pace_purchase=\"pace_purchase\"),2)\n",
|
||||
"\n",
|
||||
"X_test_expected_CA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 120,
|
||||
"id": "ac706ed7-defa-4df1-82e1-06f12fc1b6ad",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\\\begin{tabular}{lrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) \\\\\\\\\\n\\\\midrule\\n1 & 37410 & 38.930000 & 84.760000 & 1867.190000 & 4.380000 \\\\\\\\\\n2 & 29517 & 30.720000 & 2899.290000 & 74461.020000 & 9.850000 \\\\\\\\\\n3 & 20137 & 20.960000 & 10876.790000 & 344286.660000 & 22.840000 \\\\\\\\\\n4 & 9032 & 9.400000 & 215194.830000 & 9899417.810000 & 90.110000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
|
||||
"'\\\\begin{tabular}{lrrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) & pace purchase \\\\\\\\\\n\\\\midrule\\n1 & 37410 & 38.930000 & 89.750000 & 1977.020000 & 4.640000 & 16.580000 \\\\\\\\\\n2 & 29517 & 30.720000 & 3069.830000 & 78841.080000 & 10.430000 & 15.840000 \\\\\\\\\\n3 & 20137 & 20.960000 & 11516.600000 & 364538.820000 & 24.190000 & 14.890000 \\\\\\\\\\n4 & 9032 & 9.400000 & 227853.350000 & 10481736.510000 & 95.410000 & 4.830000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 48,
|
||||
"execution_count": 120,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -2677,14 +2685,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 122,
|
||||
"id": "771da0cf-c49f-4e7e-b52f-ebcfb0fb2df3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# export summary table to the MinIO storage\n",
|
||||
"\n",
|
||||
"file_name = \"table_expected_CA\"\n",
|
||||
"file_name = \"table_expected_CA_\"\n",
|
||||
"FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n",
|
||||
"with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
|
||||
" X_test_expected_CA.to_csv(file_out, index = False)"
|
||||
|
|
1727
Sport/Modelization/segment_analysis_sport_0_6.ipynb
Normal file
1727
Sport/Modelization/segment_analysis_sport_0_6.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -85,17 +85,18 @@ def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
|
|||
# plt.show()
|
||||
|
||||
|
||||
def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
|
||||
def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
|
||||
"""
|
||||
Project ticket counts and total amount for a given duration and adjust based on a score.
|
||||
|
||||
Args:
|
||||
- df (DataFrame): DataFrame containing ticket data.
|
||||
- nb_purchases (str) : Name of the column in df representing the number of purchases.
|
||||
- nb_tickets (str): Name of the column in df representing the number of tickets.
|
||||
- total_amount (str): Name of the column in df representing the total amount.
|
||||
- score_adjusted (str): Name of the column in df representing the adjusted score.
|
||||
- duration_ref (int or float): Reference duration for the project.
|
||||
- duration_projection (int or float): Duration for which the projection is made.
|
||||
- duration_ref (int or float): duration of the period of reference for the construction of the variables X.
|
||||
- duration_projection (int or float): Duration of the period of projection of sales / revenue.
|
||||
|
||||
Returns:
|
||||
DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score.
|
||||
|
@ -112,6 +113,8 @@ def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_r
|
|||
df_output.loc[:,"nb_tickets_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"nb_tickets_projected"]
|
||||
df_output.loc[:,"total_amount_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"total_amount_projected"]
|
||||
|
||||
df_output.loc[:,"pace_purchase"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)
|
||||
|
||||
return df_output
|
||||
|
||||
|
||||
|
@ -144,5 +147,8 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected,
|
|||
|
||||
df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
|
||||
df.groupby(segment)[total_amount].sum().values
|
||||
|
||||
df_drop_null_pace = df.dropna(subset=[pace_purchase])
|
||||
df_expected_CA["pace_purchase"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values
|
||||
|
||||
return df_expected_CA
|
||||
|
|
Loading…
Reference in New Issue
Block a user