added exportation to MinIo option
This commit is contained in:
parent
ca30d1daa3
commit
c549752ba7
|
@ -25,6 +25,7 @@ from sklearn.naive_bayes import GaussianNB
|
|||
from scipy.optimize import fsolve
|
||||
import pickle
|
||||
import warnings
|
||||
import io
|
||||
|
||||
# define type of activity
|
||||
type_of_activity = "sport"
|
||||
|
@ -42,7 +43,7 @@ X_test_segment["score_adjusted"] = score_adjusted_train
|
|||
|
||||
|
||||
# plot adjusted scores and save (to be tested)
|
||||
plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted")
|
||||
plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted", type_of_activity = type_of_activity)
|
||||
|
||||
image_buffer = io.BytesIO()
|
||||
plt.savefig(image_buffer, format='png')
|
||||
|
@ -54,7 +55,8 @@ image_buffer = io.BytesIO()
|
|||
plt.close()
|
||||
|
||||
# comparison between score and adjusted score
|
||||
X_test_table_adjusted_scores = X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean().reset_index().round(2)
|
||||
X_test_table_adjusted_scores = (100 * X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean()).round(2).reset_index()
|
||||
X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f"{col} (%)" for col in X_test_table_adjusted_scores.columns if col in ["score","score_adjusted", "has_purchased"]})
|
||||
|
||||
file_name = "table_adjusted_score"
|
||||
FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".csv"
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -61,7 +61,7 @@ def find_bias(odd_ratios, y_objective, initial_guess=6) :
|
|||
return bias_estimated[0]
|
||||
|
||||
|
||||
def plot_hist_scores(df, score, score_adjusted) :
|
||||
def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
|
||||
"""
|
||||
Plot a histogram comparing scores and adjusted scores.
|
||||
|
||||
|
@ -69,6 +69,7 @@ def plot_hist_scores(df, score, score_adjusted) :
|
|||
- df (DataFrame): DataFrame containing score data.
|
||||
- score (str): Name of the column in df representing the original scores.
|
||||
- score_adjusted (str): Name of the column in df representing the adjusted scores.
|
||||
- type_of_activity (str) : type of activity of the companies considered.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
@ -80,9 +81,10 @@ def plot_hist_scores(df, score, score_adjusted) :
|
|||
plt.legend()
|
||||
plt.xlabel("probability of a future purchase")
|
||||
plt.ylabel("count")
|
||||
plt.title("Comparison between score and adjusted score")
|
||||
plt.title(f"Comparison between score and adjusted score for {type_of_activity} companies")
|
||||
plt.show()
|
||||
|
||||
|
||||
def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
|
||||
"""
|
||||
Project ticket counts and total amount for a given duration and adjust based on a score.
|
||||
|
@ -140,7 +142,7 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected,
|
|||
# compute share of CA recovered
|
||||
duration_ratio=duration_ref/duration_projection
|
||||
|
||||
df_expected_CA["perct_revenue_recovered"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
|
||||
df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
|
||||
df.groupby(segment)[total_amount].sum().values
|
||||
|
||||
return df_expected_CA
|
||||
|
|
Loading…
Reference in New Issue
Block a user