added exportation to MinIo option

2024-03-24 09:42:44 +00:00 · 2024-03-24 09:42:44 +00:00 · c549752ba7
commit c549752ba7
parent ca30d1daa3
3 changed files with 302 additions and 352 deletions
--- a/0_7_CA_segment.py
+++ b/0_7_CA_segment.py
@ -25,6 +25,7 @@ from sklearn.naive_bayes import GaussianNB
 from scipy.optimize import fsolve
 import pickle
 import warnings
+import io

 # define type of activity 
 type_of_activity = "sport"
@ -42,7 +43,7 @@ X_test_segment["score_adjusted"] = score_adjusted_train


 # plot adjusted scores and save (to be tested)
-plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted")
+plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted", type_of_activity = type_of_activity)

 image_buffer = io.BytesIO()
 plt.savefig(image_buffer, format='png')
@ -54,7 +55,8 @@ image_buffer = io.BytesIO()
 plt.close()

 # comparison between score and adjusted score
-X_test_table_adjusted_scores = X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean().reset_index().round(2)
+X_test_table_adjusted_scores = (100 * X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean()).round(2).reset_index()
+X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f"{col} (%)" for col in X_test_table_adjusted_scores.columns if col in ["score","score_adjusted", "has_purchased"]})

 file_name = "table_adjusted_score"
 FILE_PATH_OUT_S3 = PATH + file_name +  type_of_activity + ".csv"
--- a/Sport/Modelization/CA_segment_sport.ipynb
+++ b/Sport/Modelization/CA_segment_sport.ipynb
--- a/utils_CA_segment.py
+++ b/utils_CA_segment.py
@ -61,7 +61,7 @@ def find_bias(odd_ratios, y_objective, initial_guess=6) :
    return bias_estimated[0]
    
    
-def plot_hist_scores(df, score, score_adjusted) :
+def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
    """
    Plot a histogram comparing scores and adjusted scores.

@ -69,6 +69,7 @@ def plot_hist_scores(df, score, score_adjusted) :
    - df (DataFrame): DataFrame containing score data.
    - score (str): Name of the column in df representing the original scores.
    - score_adjusted (str): Name of the column in df representing the adjusted scores.
+    - type_of_activity (str) : type of activity of the companies considered.

    Returns:
    None
@ -80,9 +81,10 @@ def plot_hist_scores(df, score, score_adjusted) :
    plt.legend()
    plt.xlabel("probability of a future purchase")
    plt.ylabel("count")
-    plt.title("Comparison between score and adjusted score")
+    plt.title(f"Comparison between score and adjusted score for {type_of_activity} companies")
    plt.show()

+
 def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) : 
    """
    Project ticket counts and total amount for a given duration and adjust based on a score.
@ -140,7 +142,7 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected,
    # compute share of CA recovered
    duration_ratio=duration_ref/duration_projection
    
-    df_expected_CA["perct_revenue_recovered"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
+    df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
    df.groupby(segment)[total_amount].sum().values
    
    return df_expected_CA