import pandas as pd import numpy as np import os import io import s3fs import re import pickle import warnings exec(open('utils_segmentation.py').read()) warnings.filterwarnings('ignore') # Create filesystem object S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) # choose the type of companies for which you want to run the pipeline type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?') # load test set dataset_test = load_test_file(type_of_activity) # Load Model model = load_model(type_of_activity, 'LogisticRegression_Benchmark') # Processing X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']] y_test = dataset_test[['y_has_purchased']] # Prediction y_pred_prob = model.predict_proba(X_test)[:, 1] # Add probability to dataset_test dataset_test['Probability_to_buy'] = y_pred_prob print('probability added to dataset_test') print(dataset_test.head())