2024-03-23 10:18:43 +01:00
|
|
|
def odd_ratio(score) :
|
|
|
|
"""
|
|
|
|
Args:
|
|
|
|
- score (Union[float, int]): Score value.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Odd ratio value.
|
|
|
|
"""
|
|
|
|
|
|
|
|
return score / (1 - score)
|
|
|
|
|
|
|
|
|
|
|
|
def adjust_score_1(score) :
|
|
|
|
"""
|
|
|
|
Adjust scores by replacing ones with the second highest value.
|
|
|
|
Allows to compute odd ratios then.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
- score (List[Union[float, int]]): List of score values.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
np.ndarray: Adjusted score values.
|
|
|
|
"""
|
|
|
|
|
|
|
|
second_best_score = np.array([element for element in score if element !=1]).max()
|
|
|
|
new_score = np.array([element if element!=1 else second_best_score for element in score])
|
|
|
|
return new_score
|
|
|
|
|
|
|
|
|
|
|
|
def adjusted_score(odd_ratio, bias) :
|
|
|
|
"""
|
|
|
|
Adjust the score based on the odd ratio and bias.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
- odd_ratio (Union[float, int]): Odd ratio value.
|
|
|
|
- bias (Union[float, int]): Bias value.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Adjusted score value.
|
|
|
|
"""
|
|
|
|
|
|
|
|
adjusted_score = odd_ratio/(bias+odd_ratio)
|
|
|
|
return adjusted_score
|
|
|
|
|
|
|
|
|
|
|
|
def find_bias(odd_ratios, y_objective, initial_guess=6) :
|
|
|
|
"""
|
|
|
|
Find the bias needed to adjust scores according to the purchases observed
|
|
|
|
|
|
|
|
Args:
|
|
|
|
- odd_ratios (List[float]): List of odd ratios.
|
|
|
|
- y_objective (Union[float, int]): Objective value to achieve.
|
|
|
|
- initial_guess (Union[float, int], optional): Initial guess for the bias. Default is 6.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Estimated bias value.
|
|
|
|
"""
|
|
|
|
|
|
|
|
bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)
|
|
|
|
|
|
|
|
return bias_estimated[0]
|
|
|
|
|
|
|
|
|
2024-03-24 10:42:44 +01:00
|
|
|
def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
|
2024-03-23 10:18:43 +01:00
|
|
|
"""
|
|
|
|
Plot a histogram comparing scores and adjusted scores.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
- df (DataFrame): DataFrame containing score data.
|
|
|
|
- score (str): Name of the column in df representing the original scores.
|
|
|
|
- score_adjusted (str): Name of the column in df representing the adjusted scores.
|
2024-03-24 10:42:44 +01:00
|
|
|
- type_of_activity (str) : type of activity of the companies considered.
|
2024-03-23 10:18:43 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
None
|
|
|
|
"""
|
|
|
|
|
|
|
|
plt.figure()
|
|
|
|
plt.hist(df[score], label = "score", alpha=0.6)
|
|
|
|
plt.hist(df[score_adjusted], label="adjusted score", alpha=0.6)
|
|
|
|
plt.legend()
|
|
|
|
plt.xlabel("probability of a future purchase")
|
|
|
|
plt.ylabel("count")
|
2024-03-24 10:42:44 +01:00
|
|
|
plt.title(f"Comparison between score and adjusted score for {type_of_activity} companies")
|
2024-03-24 11:44:22 +01:00
|
|
|
# plt.show()
|
2024-03-23 10:18:43 +01:00
|
|
|
|
2024-03-24 10:42:44 +01:00
|
|
|
|
2024-03-23 10:18:43 +01:00
|
|
|
def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
|
|
|
|
"""
|
|
|
|
Project ticket counts and total amount for a given duration and adjust based on a score.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
- df (DataFrame): DataFrame containing ticket data.
|
|
|
|
- nb_tickets (str): Name of the column in df representing the number of tickets.
|
|
|
|
- total_amount (str): Name of the column in df representing the total amount.
|
|
|
|
- score_adjusted (str): Name of the column in df representing the adjusted score.
|
|
|
|
- duration_ref (int or float): Reference duration for the project.
|
|
|
|
- duration_projection (int or float): Duration for which the projection is made.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score.
|
|
|
|
duration_ratio = duration_ref/duration_projection
|
|
|
|
"""
|
|
|
|
|
2024-03-23 17:23:59 +01:00
|
|
|
duration_ratio = duration_ref/duration_projection
|
|
|
|
|
2024-03-23 10:18:43 +01:00
|
|
|
df_output = df
|
|
|
|
|
2024-03-23 17:23:59 +01:00
|
|
|
df_output.loc[:,"nb_tickets_projected"] = df_output.loc[:,nb_tickets] / duration_ratio
|
|
|
|
df_output.loc[:,"total_amount_projected"] = df_output.loc[:,total_amount] / duration_ratio
|
2024-03-23 10:18:43 +01:00
|
|
|
|
2024-03-23 17:23:59 +01:00
|
|
|
df_output.loc[:,"nb_tickets_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"nb_tickets_projected"]
|
|
|
|
df_output.loc[:,"total_amount_expected"] = df_output.loc[:,score_adjusted] * df_output.loc[:,"total_amount_projected"]
|
2024-03-23 10:18:43 +01:00
|
|
|
|
|
|
|
return df_output
|
|
|
|
|
|
|
|
|
|
|
|
def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount) :
|
|
|
|
"""
|
|
|
|
Generate a summary of expected customer acquisition based on segments.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
- df (DataFrame): DataFrame containing customer data.
|
|
|
|
- segment (str): Name of the column in df representing customer segments.
|
|
|
|
- nb_tickets_expected (str): Name of the column in df representing the expected number of tickets.
|
|
|
|
- total_amount_expected (str): Name of the column in df representing the expected total amount.
|
|
|
|
- total_amount (str): Name of the column in df representing the total amount.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
DataFrame: Summary DataFrame containing expected customer acquisition metrics.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# compute nb tickets estimated and total amount expected
|
|
|
|
df_expected_CA = df.groupby(segment)[[nb_tickets_expected, total_amount_expected]].sum().reset_index()
|
|
|
|
|
|
|
|
# number of customers by segment
|
|
|
|
df_expected_CA.insert(1, "size", df.groupby(segment).size().values)
|
|
|
|
|
|
|
|
# size in percent of all customers
|
|
|
|
df_expected_CA.insert(2, "size_perct", 100 * df_expected_CA["size"]/df_expected_CA["size"].sum())
|
|
|
|
|
|
|
|
# compute share of CA recovered
|
|
|
|
duration_ratio=duration_ref/duration_projection
|
|
|
|
|
2024-03-24 10:42:44 +01:00
|
|
|
df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
|
2024-03-23 10:18:43 +01:00
|
|
|
df.groupby(segment)[total_amount].sum().values
|
|
|
|
|
|
|
|
return df_expected_CA
|