added exportation to MinIo option

This commit is contained in:
Thomas PIQUE 2024-03-24 09:42:44 +00:00
parent ca30d1daa3
commit c549752ba7
3 changed files with 302 additions and 352 deletions

View File

@ -25,6 +25,7 @@ from sklearn.naive_bayes import GaussianNB
from scipy.optimize import fsolve
import pickle
import warnings
import io
# define type of activity
type_of_activity = "sport"
@ -42,19 +43,20 @@ X_test_segment["score_adjusted"] = score_adjusted_train
# plot adjusted scores and save (to be tested)
plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted")
plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted", type_of_activity = type_of_activity)
image_buffer = io.BytesIO()
plt.savefig(image_buffer, format='png')
image_buffer.seek(0)
file_name = "hist_score_adjusted"
FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".png"
with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:
s3_file.write(image_buffer.read())
plt.close()
plt.savefig(image_buffer, format='png')
image_buffer.seek(0)
file_name = "hist_score_adjusted"
FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".png"
with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:
s3_file.write(image_buffer.read())
plt.close()
# comparison between score and adjusted score
X_test_table_adjusted_scores = X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean().reset_index().round(2)
X_test_table_adjusted_scores = (100 * X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean()).round(2).reset_index()
X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f"{col} (%)" for col in X_test_table_adjusted_scores.columns if col in ["score","score_adjusted", "has_purchased"]})
file_name = "table_adjusted_score"
FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".csv"

File diff suppressed because one or more lines are too long

View File

@ -61,7 +61,7 @@ def find_bias(odd_ratios, y_objective, initial_guess=6) :
return bias_estimated[0]
def plot_hist_scores(df, score, score_adjusted) :
def plot_hist_scores(df, score, score_adjusted, type_of_activity) :
"""
Plot a histogram comparing scores and adjusted scores.
@ -69,6 +69,7 @@ def plot_hist_scores(df, score, score_adjusted) :
- df (DataFrame): DataFrame containing score data.
- score (str): Name of the column in df representing the original scores.
- score_adjusted (str): Name of the column in df representing the adjusted scores.
- type_of_activity (str) : type of activity of the companies considered.
Returns:
None
@ -80,9 +81,10 @@ def plot_hist_scores(df, score, score_adjusted) :
plt.legend()
plt.xlabel("probability of a future purchase")
plt.ylabel("count")
plt.title("Comparison between score and adjusted score")
plt.title(f"Comparison between score and adjusted score for {type_of_activity} companies")
plt.show()
def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :
"""
Project ticket counts and total amount for a given duration and adjust based on a score.
@ -140,7 +142,7 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected,
# compute share of CA recovered
duration_ratio=duration_ref/duration_projection
df_expected_CA["perct_revenue_recovered"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \
df.groupby(segment)[total_amount].sum().values
return df_expected_CA