BDC-team-1/utils_CA_segment.py

def odd_ratio(score) :
    """
    Calculate the odd ratio from a score.

    Args:
    - score (Union[float, int]): Score value.

    Returns:
    float: Odd ratio value.
    """
    
    return score / (1 - score)


def adjust_score_1(score) :
    """
    Adjust scores by replacing ones with the second highest value.
    Allows to compute odd ratios then.

    Args:
    - score (List[Union[float, int]]): List of score values.

    Returns:
    np.ndarray: Adjusted score values.
    """
    
    second_best_score = np.array([element for element in score if element !=1]).max()
    new_score = np.array([element if element!=1 else second_best_score for element in score])    
    return new_score


def adjusted_score(odd_ratio, bias) :
    """
    Adjust the score based on the odd ratio and bias.

    Args:
    - odd_ratio (Union[float, int]): Odd ratio value.
    - bias (Union[float, int]): Bias value.

    Returns:
    float: Adjusted score value.
    """
    
    adjusted_score = odd_ratio/(bias+odd_ratio)
    return adjusted_score


def find_bias(odd_ratios, y_objective, initial_guess=6) :
    """
    Find the bias needed to adjust scores according to the purchases observed

    Args:
    - odd_ratios (List[float]): List of odd ratios.
    - y_objective (Union[float, int]): Objective value to achieve.
    - initial_guess (Union[float, int], optional): Initial guess for the bias. Default is 6.

    Returns:
    float: Estimated bias value.
    """

    bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)
 
    return bias_estimated[0]
    
    
def plot_hist_scores(df, score, score_adjusted) :
    """
    Plot a histogram comparing scores and adjusted scores.

    Args:
    - df (DataFrame): DataFrame containing score data.
    - score (str): Name of the column in df representing the original scores.
    - score_adjusted (str): Name of the column in df representing the adjusted scores.

    Returns:
    None
    """

    plt.figure()
    plt.hist(df[score], label = "score", alpha=0.6)
    plt.hist(df[score_adjusted], label="adjusted score", alpha=0.6)
    plt.legend()
    plt.xlabel("probability of a future purchase")
    plt.ylabel("count")
    plt.title("Comparison between score and adjusted score")
    plt.show()

def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) : 
    """
    Project ticket counts and total amount for a given duration and adjust based on a score.

    Args:
    - df (DataFrame): DataFrame containing ticket data.
    - nb_tickets (str): Name of the column in df representing the number of tickets.
    - total_amount (str): Name of the column in df representing the total amount.
    - score_adjusted (str): Name of the column in df representing the adjusted score.
    - duration_ref (int or float): Reference duration for the project.
    - duration_projection (int or float): Duration for which the projection is made.

    Returns:
    DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score.
    duration_ratio = duration_ref/duration_projection
    """

    df_output = df

    df_output["nb_tickets_projected"] = df_output[nb_tickets] / duration_ratio
    df_output["total_amount_projected"] = df_output[total_amount] / duration_ratio
    
    df_output["nb_tickets_expected"] = df_output[score_adjusted] * df_output["nb_tickets_projected"]
    df_output["total_amount_expected"] = df_output[score_adjusted] * df_output["total_amount_projected"]

    return df_output
    

def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount) :  
    """
    Generate a summary of expected customer acquisition based on segments.

    Args:
    - df (DataFrame): DataFrame containing customer data.
    - segment (str): Name of the column in df representing customer segments.
    - nb_tickets_expected (str): Name of the column in df representing the expected number of tickets.
    - total_amount_expected (str): Name of the column in df representing the expected total amount.
    - total_amount (str): Name of the column in df representing the total amount.

    Returns:
    DataFrame: Summary DataFrame containing expected customer acquisition metrics.
    """
    
    # compute nb tickets estimated and total amount expected
    df_expected_CA = df.groupby(segment)[[nb_tickets_expected, total_amount_expected]].sum().reset_index()
    
    # number of customers by segment
    df_expected_CA.insert(1, "size", df.groupby(segment).size().values)
    
    # size in percent of all customers
    df_expected_CA.insert(2, "size_perct", 100 * df_expected_CA["size"]/df_expected_CA["size"].sum())
    
    # compute share of CA recovered
    duration_ref=1.5
    duration_projection=1
    duration_ratio=duration_ref/duration_projection
    
    df_expected_CA["perct_revenue_recovered"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
    df.groupby(segment)[total_amount].sum().values
    
    return df_expected_CA
added utils for CA estimation 2024-03-23 10:18:43 +01:00			`def odd_ratio(score) :`
			`"""`
			`Calculate the odd ratio from a score.`

			`Args:`
			`- score (Union[float, int]): Score value.`

			`Returns:`
			`float: Odd ratio value.`
			`"""`

			`return score / (1 - score)`


			`def adjust_score_1(score) :`
			`"""`
			`Adjust scores by replacing ones with the second highest value.`
			`Allows to compute odd ratios then.`

			`Args:`
			`- score (List[Union[float, int]]): List of score values.`

			`Returns:`
			`np.ndarray: Adjusted score values.`
			`"""`

			`second_best_score = np.array([element for element in score if element !=1]).max()`
			`new_score = np.array([element if element!=1 else second_best_score for element in score])`
			`return new_score`


			`def adjusted_score(odd_ratio, bias) :`
			`"""`
			`Adjust the score based on the odd ratio and bias.`

			`Args:`
			`- odd_ratio (Union[float, int]): Odd ratio value.`
			`- bias (Union[float, int]): Bias value.`

			`Returns:`
			`float: Adjusted score value.`
			`"""`

			`adjusted_score = odd_ratio/(bias+odd_ratio)`
			`return adjusted_score`


			`def find_bias(odd_ratios, y_objective, initial_guess=6) :`
			`"""`
			`Find the bias needed to adjust scores according to the purchases observed`

			`Args:`
			`- odd_ratios (List[float]): List of odd ratios.`
			`- y_objective (Union[float, int]): Objective value to achieve.`
			`- initial_guess (Union[float, int], optional): Initial guess for the bias. Default is 6.`

			`Returns:`
			`float: Estimated bias value.`
			`"""`

			`bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)`

			`return bias_estimated[0]`


			`def plot_hist_scores(df, score, score_adjusted) :`
			`"""`
			`Plot a histogram comparing scores and adjusted scores.`

			`Args:`
			`- df (DataFrame): DataFrame containing score data.`
			`- score (str): Name of the column in df representing the original scores.`
			`- score_adjusted (str): Name of the column in df representing the adjusted scores.`

			`Returns:`
			`None`
			`"""`

			`plt.figure()`
			`plt.hist(df[score], label = "score", alpha=0.6)`
			`plt.hist(df[score_adjusted], label="adjusted score", alpha=0.6)`
			`plt.legend()`
			`plt.xlabel("probability of a future purchase")`
			`plt.ylabel("count")`
			`plt.title("Comparison between score and adjusted score")`
			`plt.show()`

			`def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :`
			`"""`
			`Project ticket counts and total amount for a given duration and adjust based on a score.`

			`Args:`
			`- df (DataFrame): DataFrame containing ticket data.`
			`- nb_tickets (str): Name of the column in df representing the number of tickets.`
			`- total_amount (str): Name of the column in df representing the total amount.`
			`- score_adjusted (str): Name of the column in df representing the adjusted score.`
			`- duration_ref (int or float): Reference duration for the project.`
			`- duration_projection (int or float): Duration for which the projection is made.`

			`Returns:`
			`DataFrame: DataFrame with projected ticket counts and total amount adjusted based on the score.`
			`duration_ratio = duration_ref/duration_projection`
			`"""`

			`df_output = df`

			`df_output["nb_tickets_projected"] = df_output[nb_tickets] / duration_ratio`
			`df_output["total_amount_projected"] = df_output[total_amount] / duration_ratio`

			`df_output["nb_tickets_expected"] = df_output[score_adjusted] * df_output["nb_tickets_projected"]`
			`df_output["total_amount_expected"] = df_output[score_adjusted] * df_output["total_amount_projected"]`

			`return df_output`


			`def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount) :`
			`"""`
			`Generate a summary of expected customer acquisition based on segments.`

			`Args:`
			`- df (DataFrame): DataFrame containing customer data.`
			`- segment (str): Name of the column in df representing customer segments.`
			`- nb_tickets_expected (str): Name of the column in df representing the expected number of tickets.`
			`- total_amount_expected (str): Name of the column in df representing the expected total amount.`
			`- total_amount (str): Name of the column in df representing the total amount.`

			`Returns:`
			`DataFrame: Summary DataFrame containing expected customer acquisition metrics.`
			`"""`

			`# compute nb tickets estimated and total amount expected`
			`df_expected_CA = df.groupby(segment)[[nb_tickets_expected, total_amount_expected]].sum().reset_index()`

			`# number of customers by segment`
			`df_expected_CA.insert(1, "size", df.groupby(segment).size().values)`

			`# size in percent of all customers`
			`df_expected_CA.insert(2, "size_perct", 100 * df_expected_CA["size"]/df_expected_CA["size"].sum())`

			`# compute share of CA recovered`
			`duration_ref=1.5`
			`duration_projection=1`
			`duration_ratio=duration_ref/duration_projection`

			`df_expected_CA["perct_revenue_recovered"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \`
			`df.groupby(segment)[total_amount].sum().values`

			`return df_expected_CA`