added exportation to MinIo option
This commit is contained in:
		
							parent
							
								
									ca30d1daa3
								
							
						
					
					
						commit
						c549752ba7
					
				|  | @ -25,6 +25,7 @@ from sklearn.naive_bayes import GaussianNB | |||
| from scipy.optimize import fsolve | ||||
| import pickle | ||||
| import warnings | ||||
| import io | ||||
| 
 | ||||
| # define type of activity  | ||||
| type_of_activity = "sport" | ||||
|  | @ -42,19 +43,20 @@ X_test_segment["score_adjusted"] = score_adjusted_train | |||
| 
 | ||||
| 
 | ||||
| # plot adjusted scores and save (to be tested) | ||||
| plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted") | ||||
| plot_hist_scores(X_test_segment, score = "score", score_adjusted = "score_adjusted", type_of_activity = type_of_activity) | ||||
| 
 | ||||
| image_buffer = io.BytesIO() | ||||
|     plt.savefig(image_buffer, format='png') | ||||
|     image_buffer.seek(0) | ||||
|     file_name = "hist_score_adjusted" | ||||
|     FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".png" | ||||
|     with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file: | ||||
|         s3_file.write(image_buffer.read()) | ||||
|     plt.close() | ||||
| plt.savefig(image_buffer, format='png') | ||||
| image_buffer.seek(0) | ||||
| file_name = "hist_score_adjusted" | ||||
| FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".png" | ||||
| with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file: | ||||
|     s3_file.write(image_buffer.read()) | ||||
| plt.close() | ||||
| 
 | ||||
| # comparison between score and adjusted score | ||||
| X_test_table_adjusted_scores = X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean().reset_index().round(2) | ||||
| X_test_table_adjusted_scores = (100 * X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean()).round(2).reset_index() | ||||
| X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f"{col} (%)" for col in X_test_table_adjusted_scores.columns if col in ["score","score_adjusted", "has_purchased"]}) | ||||
| 
 | ||||
| file_name = "table_adjusted_score" | ||||
| FILE_PATH_OUT_S3 = PATH + file_name +  type_of_activity + ".csv" | ||||
|  |  | |||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							|  | @ -61,7 +61,7 @@ def find_bias(odd_ratios, y_objective, initial_guess=6) : | |||
|     return bias_estimated[0] | ||||
|      | ||||
|      | ||||
| def plot_hist_scores(df, score, score_adjusted) : | ||||
| def plot_hist_scores(df, score, score_adjusted, type_of_activity) : | ||||
|     """ | ||||
|     Plot a histogram comparing scores and adjusted scores. | ||||
| 
 | ||||
|  | @ -69,6 +69,7 @@ def plot_hist_scores(df, score, score_adjusted) : | |||
|     - df (DataFrame): DataFrame containing score data. | ||||
|     - score (str): Name of the column in df representing the original scores. | ||||
|     - score_adjusted (str): Name of the column in df representing the adjusted scores. | ||||
|     - type_of_activity (str) : type of activity of the companies considered. | ||||
| 
 | ||||
|     Returns: | ||||
|     None | ||||
|  | @ -80,9 +81,10 @@ def plot_hist_scores(df, score, score_adjusted) : | |||
|     plt.legend() | ||||
|     plt.xlabel("probability of a future purchase") | ||||
|     plt.ylabel("count") | ||||
|     plt.title("Comparison between score and adjusted score") | ||||
|     plt.title(f"Comparison between score and adjusted score for {type_of_activity} companies") | ||||
|     plt.show() | ||||
| 
 | ||||
| 
 | ||||
| def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :  | ||||
|     """ | ||||
|     Project ticket counts and total amount for a given duration and adjust based on a score. | ||||
|  | @ -140,7 +142,7 @@ def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, | |||
|     # compute share of CA recovered | ||||
|     duration_ratio=duration_ref/duration_projection | ||||
|      | ||||
|     df_expected_CA["perct_revenue_recovered"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \ | ||||
|     df_expected_CA["revenue_recovered_perct"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \ | ||||
|     df.groupby(segment)[total_amount].sum().values | ||||
|      | ||||
|     return df_expected_CA | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user