def odd_ratio(score) : """ Calculate the odd ratio from a score. Args: - score (Union[float, int]): Score value. Returns: float: Odd ratio value. """ return score / (1 - score) def adjust_score_1(score) : """ Adjust scores by replacing ones with the second highest value. Allows to compute odd ratios then. Args: - score (List[Union[float, int]]): List of score values. Returns: np.ndarray: Adjusted score values. """ second_best_score = np.array([element for element in score if element !=1]).max() new_score = np.array([element if element!=1 else second_best_score for element in score]) return new_score def adjusted_score(odd_ratio, bias) : """ Adjust the score based on the odd ratio and bias. Args: - odd_ratio (Union[float, int]): Odd ratio value. - bias (Union[float, int]): Bias value. Returns: float: Adjusted score value. """ adjusted_score = odd_ratio/(bias+odd_ratio) return adjusted_score def find_bias(odd_ratios, y_objective, initial_guess=6) : """ Find the bias needed to adjust scores according to the purchases observed Args: - odd_ratios (List[float]): List of odd ratios. - y_objective (Union[float, int]): Objective value to achieve. - initial_guess (Union[float, int], optional): Initial guess for the bias. Default is 6. Returns: float: Estimated bias value. """ bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6) return bias_estimated[0] def plot_hist_scores(df, score, score_adjusted) : """ Plot a histogram comparing scores and adjusted scores. Args: - df (DataFrame): DataFrame containing score data. - score (str): Name of the column in df representing the original scores. - score_adjusted (str): Name of the column in df representing the adjusted scores. Returns: None """ plt.figure() plt.hist(df[score], label = "score", alpha=0.6) plt.hist(df[score_adjusted], label="adjusted score", alpha=0.6) plt.legend() plt.xlabel("probability of a future purchase") plt.ylabel("count") plt.title("Comparison between score and adjusted score") plt.show() def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) : """ Project ticket counts and total amount for a given duration and adjust based on a score. Args: - df (DataFrame): DataFrame containing ticket data. - nb_tickets (str): Name of the column in df representing the number of tickets. - total_amount (str): Name of the column in df representing the total amount. - score_adjusted (str): Name of the column in df representing the adjusted score. - duration_ref (int or float): Reference duration for the project. - duration_projection (int or float): Duration for which the projection is made. Returns: DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score. duration_ratio = duration_ref/duration_projection """ df_output = df df_output["nb_tickets_projected"] = df_output[nb_tickets] / duration_ratio df_output["total_amount_projected"] = df_output[total_amount] / duration_ratio df_output["nb_tickets_expected"] = df_output[score_adjusted] * df_output["nb_tickets_projected"] df_output["total_amount_expected"] = df_output[score_adjusted] * df_output["total_amount_projected"] return df_output def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount) : """ Generate a summary of expected customer acquisition based on segments. Args: - df (DataFrame): DataFrame containing customer data. - segment (str): Name of the column in df representing customer segments. - nb_tickets_expected (str): Name of the column in df representing the expected number of tickets. - total_amount_expected (str): Name of the column in df representing the expected total amount. - total_amount (str): Name of the column in df representing the total amount. Returns: DataFrame: Summary DataFrame containing expected customer acquisition metrics. """ # compute nb tickets estimated and total amount expected df_expected_CA = df.groupby(segment)[[nb_tickets_expected, total_amount_expected]].sum().reset_index() # number of customers by segment df_expected_CA.insert(1, "size", df.groupby(segment).size().values) # size in percent of all customers df_expected_CA.insert(2, "size_perct", 100 * df_expected_CA["size"]/df_expected_CA["size"].sum()) # compute share of CA recovered duration_ref=1.5 duration_projection=1 duration_ratio=duration_ref/duration_projection df_expected_CA["perct_revenue_recovered"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \ df.groupby(segment)[total_amount].sum().values return df_expected_CA