add probability
This commit is contained in:
parent
3d03965084
commit
5cd1bcc222
|
@ -11,8 +11,30 @@ import warnings
|
||||||
exec(open('utils_segmentation.py').read())
|
exec(open('utils_segmentation.py').read())
|
||||||
warnings.filterwarnings('ignore')
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
# Create filesystem object
|
||||||
|
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
|
||||||
|
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
|
||||||
|
|
||||||
|
# choose the type of companies for which you want to run the pipeline
|
||||||
|
type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
|
||||||
|
|
||||||
|
# load test set
|
||||||
|
dataset_test = load_test_file(type_of_activity)
|
||||||
|
|
||||||
# Load Model
|
# Load Model
|
||||||
|
model = load_model(type_of_activity, 'LogisticRegression_Benchmark')
|
||||||
|
|
||||||
|
# Processing
|
||||||
|
X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',
|
||||||
|
'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner',
|
||||||
|
'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']]
|
||||||
|
|
||||||
|
y_test = dataset_test[['y_has_purchased']]
|
||||||
|
|
||||||
model = load_model(type_of_activity, model)
|
# Prediction
|
||||||
|
y_pred_prob = model.predict_proba(X_test)[:, 1]
|
||||||
|
|
||||||
|
# Add probability to dataset_test
|
||||||
|
dataset_test['Probability_to_buy'] = y_pred_prob
|
||||||
|
print('probability added to dataset_test')
|
||||||
|
print(dataset_test.head())
|
|
@ -84,7 +84,7 @@ def compute_recall_companies(dataset_test, y_pred, type_of_activity, model):
|
||||||
|
|
||||||
def features_target_split(dataset_train, dataset_test):
|
def features_target_split(dataset_train, dataset_test):
|
||||||
features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',
|
features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',
|
||||||
'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',
|
'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner',
|
||||||
'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']
|
'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']
|
||||||
X_train = dataset_train[features_l]
|
X_train = dataset_train[features_l]
|
||||||
y_train = dataset_train[['y_has_purchased']]
|
y_train = dataset_train[['y_has_purchased']]
|
||||||
|
|
|
@ -18,3 +18,10 @@ def load_model(type_of_activity, model):
|
||||||
model = pickle.loads(model_bytes)
|
model = pickle.loads(model_bytes)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def load_test_file(type_of_activity):
|
||||||
|
file_path_test = f"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv"
|
||||||
|
with fs.open(file_path_test, mode="rb") as file_in:
|
||||||
|
dataset_test = pd.read_csv(file_in, sep=",")
|
||||||
|
return dataset_test
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user