fixed forecasting issues
This commit is contained in:
parent
14953b031a
commit
acf7621d9a
|
@ -69,7 +69,9 @@ save_file_s3_ca("hist_score_adjusted_", type_of_activity)
|
|||
X_test_table_adjusted_scores = (100 * X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean()).round(2).reset_index()
|
||||
X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f"{col} (%)" for col in X_test_table_adjusted_scores.columns if col in ["score","score_adjusted", "has_purchased"]})
|
||||
|
||||
print("Table of scores :\n")
|
||||
print(X_test_table_adjusted_scores)
|
||||
print("\n")
|
||||
|
||||
# save table
|
||||
file_name = "table_adjusted_score_"
|
||||
|
@ -84,14 +86,24 @@ X_test_segment = project_tickets_CA (X_test_segment, "nb_purchases", "nb_tickets
|
|||
|
||||
|
||||
### 3. table summarizing projections (nb tickets, revenue)
|
||||
"""
|
||||
X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment="quartile",
|
||||
nb_tickets_expected="nb_tickets_expected", total_amount_expected="total_amount_expected",
|
||||
total_amount="total_amount", pace_purchase="pace_purchase"),2)
|
||||
"""
|
||||
|
||||
X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment="quartile",
|
||||
nb_tickets_expected="nb_tickets_expected", total_amount_expected="total_amount_expected",
|
||||
total_amount="total_amount_corrected", pace_purchase="pace_purchase"),2)
|
||||
|
||||
# rename columns
|
||||
mapping_dict = {col: col.replace("perct", "(%)").replace("_", " ") for col in X_test_expected_CA.columns}
|
||||
X_test_expected_CA = X_test_expected_CA.rename(columns=mapping_dict)
|
||||
|
||||
print("Summary of forecast :\n")
|
||||
print(X_test_expected_CA)
|
||||
print("\n")
|
||||
|
||||
# save table
|
||||
file_name = "table_expected_CA_"
|
||||
FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".csv"
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -167,7 +167,8 @@ def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
|
|||
# plt.show()
|
||||
|
||||
|
||||
def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
|
||||
def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
|
||||
|
||||
"""
|
||||
Project ticket counts and total amount for a given duration and adjust based on a score.
|
||||
|
||||
|
@ -184,13 +185,38 @@ def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjust
|
|||
DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score.
|
||||
duration_ratio = duration_ref/duration_projection
|
||||
"""
|
||||
|
||||
|
||||
duration_ratio = duration_ref/duration_projection
|
||||
|
||||
df_output = df
|
||||
|
||||
# project number of tickets : at least 1 ticket purchased if the customer purchased
|
||||
df_output.loc[:,"nb_tickets_projected"] = df_output.loc[:,nb_tickets].apply(lambda x : max(1, x /duration_ratio))
|
||||
|
||||
df_output.loc[:,"nb_tickets_projected"] = df_output.loc[:,nb_tickets] / duration_ratio
|
||||
df_output.loc[:,"total_amount_projected"] = df_output.loc[:,total_amount] / duration_ratio
|
||||
# project amount : if the customer buys a ticket, we expect the amount to be at least the average price of tickets
|
||||
# for customers purchasing exactly one ticket
|
||||
if df_output.loc[df_output[nb_tickets]==1].shape[0] > 0 :
|
||||
avg_price = df_output.loc[df_output[nb_tickets]==1][total_amount].mean()
|
||||
else :
|
||||
avg_price = df_output[total_amount].mean()
|
||||
|
||||
# we compute the avg price of ticket for each customer
|
||||
df_output["avg_ticket_price"] = df_output[total_amount]/df_output[nb_tickets]
|
||||
|
||||
# correct negatives total amounts
|
||||
df_output.loc[:,"total_amount_corrected"] = np.where(df_output[total_amount] < 0,
|
||||
avg_price * df_output[nb_tickets],
|
||||
df_output[total_amount])
|
||||
|
||||
df_output.loc[:,"total_amount_projected"] = np.where(
|
||||
# if no ticket bought in the past, we take the average price
|
||||
df_output[nb_tickets]==0, avg_price,
|
||||
# if avg prices of tickets are negative, we recompute the expected amount based on the avg price of a ticket
|
||||
# observed on the whole population
|
||||
np.where(X_test_segment["avg_ticket_price"] < 0, avg_price * df_output.loc[:,"nb_tickets_projected"],
|
||||
# else, the amount projected is the average price of tickets bought by the customer * nb tickets projected
|
||||
df_output["avg_ticket_price"] * df_output.loc[:,"nb_tickets_projected"])
|
||||
)
|
||||
|
||||
df_output.loc[:,"nb_tickets_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"nb_tickets_projected"]
|
||||
df_output.loc[:,"total_amount_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"total_amount_projected"]
|
||||
|
@ -198,7 +224,7 @@ def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjust
|
|||
df_output.loc[:,"pace_purchase"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)
|
||||
|
||||
return df_output
|
||||
|
||||
|
||||
|
||||
def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount, pace_purchase,
|
||||
duration_ref=17, duration_projection=12) :
|
||||
|
@ -231,6 +257,9 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected,
|
|||
df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
|
||||
df.groupby(segment)[total_amount].sum().values
|
||||
|
||||
df_expected_CA["share_future_revenue_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
|
||||
df[total_amount].sum()
|
||||
|
||||
df_drop_null_pace = df.dropna(subset=[pace_purchase])
|
||||
df_expected_CA["pace_purchase"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user