{ "cells": [ { "cell_type": "markdown", "id": "84b6e27e-4bda-4d38-8689-ec7fc0da1848", "metadata": {}, "source": [ "# Define segment and predict sales associated" ] }, { "cell_type": "markdown", "id": "ec059482-45d3-4ae6-99bc-9b4ced115db3", "metadata": {}, "source": [ "## Importations of packages " ] }, { "cell_type": "code", "execution_count": 70, "id": "9771bf29-d08e-4674-8c23-9a2672fbef8f", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pandas import DataFrame\n", "import numpy as np\n", "import os\n", "import s3fs\n", "import re\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n", "from sklearn.utils import class_weight\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n", "from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n", "from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n", "from sklearn.naive_bayes import GaussianNB\n", "from scipy.optimize import fsolve\n", "import io\n", "\n", "import pickle\n", "import warnings" ] }, { "cell_type": "markdown", "id": "048fcd7c-800a-4a6b-b725-faf8410f924a", "metadata": {}, "source": [ "## load databases" ] }, { "cell_type": "code", "execution_count": 71, "id": "539ccbdf-f29f-4f04-99c1-8c88d0efe514", "metadata": {}, "outputs": [], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})" ] }, { "cell_type": "code", "execution_count": 270, "id": "d6017ed0-6233-4888-85a7-05dec50a255b", "metadata": {}, "outputs": [], "source": [ "type_of_activity = \"musee\"" ] }, { "cell_type": "code", "execution_count": 73, "id": "0c3a6ddc-9345-4a42-b6bf-a20a95de3028", "metadata": {}, "outputs": [], "source": [ "def load_train_test(type_of_activity):\n", " BUCKET = f\"projet-bdc2324-team1/Generalization/{type_of_activity}\"\n", " File_path_train = BUCKET + \"/Train_set.csv\"\n", " File_path_test = BUCKET + \"/Test_set.csv\"\n", " \n", " with fs.open( File_path_train, mode=\"rb\") as file_in:\n", " dataset_train = pd.read_csv(file_in, sep=\",\")\n", " # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n", "\n", " with fs.open(File_path_test, mode=\"rb\") as file_in:\n", " dataset_test = pd.read_csv(file_in, sep=\",\")\n", " # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n", " \n", " return dataset_train, dataset_test" ] }, { "cell_type": "code", "execution_count": 271, "id": "2831d546-b365-498b-8248-c618bd9c3057", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1080/2350085345.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n", " dataset_train = pd.read_csv(file_in, sep=\",\")\n" ] }, { "data": { "text/plain": [ "customer_id 0\n", "nb_tickets 0\n", "nb_purchases 0\n", "total_amount 0\n", "nb_suppliers 0\n", "vente_internet_max 0\n", "purchase_date_min 0\n", "purchase_date_max 0\n", "time_between_purchase 0\n", "nb_tickets_internet 0\n", "street_id 0\n", "structure_id 389658\n", "mcp_contact_id 150354\n", "fidelity 0\n", "tenant_id 0\n", "is_partner 0\n", "deleted_at 434278\n", "gender 0\n", "is_email_true 0\n", "opt_in 0\n", "last_buying_date 183987\n", "max_price 183987\n", "ticket_sum 0\n", "average_price 94783\n", "average_purchase_delay 183987\n", "average_price_basket 183987\n", "average_ticket_basket 183987\n", "total_price 89204\n", "purchase_count 0\n", "first_buying_date 183987\n", "country 141237\n", "gender_label 0\n", "gender_female 0\n", "gender_male 0\n", "gender_other 0\n", "country_fr 141237\n", "nb_campaigns 0\n", "nb_campaigns_opened 0\n", "time_to_open 258182\n", "y_has_purchased 0\n", "dtype: int64" ] }, "execution_count": 271, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_train, dataset_test = load_train_test(type_of_activity)\n", "dataset_train.isna().sum()" ] }, { "cell_type": "code", "execution_count": 77, "id": "b8827f7b-b304-4f51-9814-c7a98ed88cf0", "metadata": {}, "outputs": [], "source": [ "def features_target_split(dataset_train, dataset_test):\n", " \n", " features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n", " 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n", " 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n", "\n", " # we suppress fidelity, time between purchase, and gender other (colinearity issue)\n", " \"\"\"\n", " features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', \n", " 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'is_email_true', \n", " 'opt_in', 'gender_female', 'gender_male', 'nb_campaigns', 'nb_campaigns_opened']\n", " \"\"\"\n", " \n", " X_train = dataset_train[features_l]\n", " y_train = dataset_train[['y_has_purchased']]\n", "\n", " X_test = dataset_test[features_l]\n", " y_test = dataset_test[['y_has_purchased']]\n", " return X_train, X_test, y_train, y_test" ] }, { "cell_type": "code", "execution_count": 272, "id": "c18195fc-ed40-4e39-a59e-c9ecc5a8e6c3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape train : (434278, 17)\n", "Shape test : (186120, 17)\n" ] } ], "source": [ "X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)\n", "print(\"Shape train : \", X_train.shape)\n", "print(\"Shape test : \", X_test.shape)" ] }, { "cell_type": "markdown", "id": "74eda066-5e01-43aa-b0cf-cc6d9bbf770e", "metadata": {}, "source": [ "## get results from the logit cross validated model" ] }, { "cell_type": "code", "execution_count": 79, "id": "7c81390e-598c-4f02-bd56-dd03b00dcb33", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malegender_othernb_campaignsnb_campaigns_opened
04.01.0100.001.00.05.1771875.1771870.0000000.01TrueFalse1000.00.0
11.01.055.001.00.0426.265613426.2656130.0000000.02TrueTrue0100.00.0
217.01.080.001.00.0436.033437436.0334370.0000000.02TrueTrue1000.00.0
34.01.0120.001.00.05.1964125.1964120.0000000.01TrueFalse1000.00.0
434.02.0416.001.00.0478.693148115.631470363.0616780.04TrueFalse1000.00.0
......................................................
960911.01.067.311.01.0278.442257278.4422570.0000001.02TrueFalse01015.05.0
960921.01.061.411.01.0189.207373189.2073730.0000001.01TrueFalse01012.09.0
960930.00.00.000.00.0550.000000550.000000-1.0000000.01TrueTrue10029.03.0
960941.01.079.431.01.0279.312905279.3129050.0000001.01TrueFalse01020.04.0
960950.00.00.000.00.0550.000000550.000000-1.0000000.02TrueFalse01031.04.0
\n", "

96096 rows × 17 columns

\n", "
" ], "text/plain": [ " nb_tickets nb_purchases total_amount nb_suppliers \\\n", "0 4.0 1.0 100.00 1.0 \n", "1 1.0 1.0 55.00 1.0 \n", "2 17.0 1.0 80.00 1.0 \n", "3 4.0 1.0 120.00 1.0 \n", "4 34.0 2.0 416.00 1.0 \n", "... ... ... ... ... \n", "96091 1.0 1.0 67.31 1.0 \n", "96092 1.0 1.0 61.41 1.0 \n", "96093 0.0 0.0 0.00 0.0 \n", "96094 1.0 1.0 79.43 1.0 \n", "96095 0.0 0.0 0.00 0.0 \n", "\n", " vente_internet_max purchase_date_min purchase_date_max \\\n", "0 0.0 5.177187 5.177187 \n", "1 0.0 426.265613 426.265613 \n", "2 0.0 436.033437 436.033437 \n", "3 0.0 5.196412 5.196412 \n", "4 0.0 478.693148 115.631470 \n", "... ... ... ... \n", "96091 1.0 278.442257 278.442257 \n", "96092 1.0 189.207373 189.207373 \n", "96093 0.0 550.000000 550.000000 \n", "96094 1.0 279.312905 279.312905 \n", "96095 0.0 550.000000 550.000000 \n", "\n", " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", "0 0.000000 0.0 1 True \n", "1 0.000000 0.0 2 True \n", "2 0.000000 0.0 2 True \n", "3 0.000000 0.0 1 True \n", "4 363.061678 0.0 4 True \n", "... ... ... ... ... \n", "96091 0.000000 1.0 2 True \n", "96092 0.000000 1.0 1 True \n", "96093 -1.000000 0.0 1 True \n", "96094 0.000000 1.0 1 True \n", "96095 -1.000000 0.0 2 True \n", "\n", " opt_in gender_female gender_male gender_other nb_campaigns \\\n", "0 False 1 0 0 0.0 \n", "1 True 0 1 0 0.0 \n", "2 True 1 0 0 0.0 \n", "3 False 1 0 0 0.0 \n", "4 False 1 0 0 0.0 \n", "... ... ... ... ... ... \n", "96091 False 0 1 0 15.0 \n", "96092 False 0 1 0 12.0 \n", "96093 True 1 0 0 29.0 \n", "96094 False 0 1 0 20.0 \n", "96095 False 0 1 0 31.0 \n", "\n", " nb_campaigns_opened \n", "0 0.0 \n", "1 0.0 \n", "2 0.0 \n", "3 0.0 \n", "4 0.0 \n", "... ... \n", "96091 5.0 \n", "96092 9.0 \n", "96093 3.0 \n", "96094 4.0 \n", "96095 4.0 \n", "\n", "[96096 rows x 17 columns]" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test" ] }, { "cell_type": "code", "execution_count": 80, "id": "c708f439-bb75-4688-bf4f-4c04e13deaae", "metadata": {}, "outputs": [], "source": [ "def load_model(type_of_activity, model):\n", " BUCKET = f\"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/\"\n", " filename = model + '.pkl'\n", " file_path = BUCKET + filename\n", " with fs.open(file_path, mode=\"rb\") as f:\n", " model_bytes = f.read()\n", "\n", " model = pickle.loads(model_bytes)\n", " return model" ] }, { "cell_type": "code", "execution_count": 286, "id": "5261a803-05b8-41a0-968c-dc7bde48ddd3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
GridSearchCV(cv=3, error_score='raise',\n",
       "             estimator=Pipeline(steps=[('preprocessor',\n",
       "                                        ColumnTransformer(transformers=[('num',\n",
       "                                                                         Pipeline(steps=[('scaler',\n",
       "                                                                                          StandardScaler())]),\n",
       "                                                                         ['nb_tickets',\n",
       "                                                                          'nb_purchases',\n",
       "                                                                          'total_amount',\n",
       "                                                                          'nb_suppliers',\n",
       "                                                                          'vente_internet_max',\n",
       "                                                                          'purchase_date_min',\n",
       "                                                                          'purchase_date_max',\n",
       "                                                                          'time_between_purchase',\n",
       "                                                                          'nb_tickets_internet',\n",
       "                                                                          'nb_campaigns',\n",
       "                                                                          'nb_...\n",
       "       1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
       "       2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
       "       4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
       "       6.400000e+01]),\n",
       "                         'LogisticRegression_cv__class_weight': ['balanced',\n",
       "                                                                 {0.0: 0.5223906809346011,\n",
       "                                                                  1.0: 11.665359406898034}],\n",
       "                         'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
       "             scoring=make_scorer(recall_score, response_method='predict'))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GridSearchCV(cv=3, error_score='raise',\n", " estimator=Pipeline(steps=[('preprocessor',\n", " ColumnTransformer(transformers=[('num',\n", " Pipeline(steps=[('scaler',\n", " StandardScaler())]),\n", " ['nb_tickets',\n", " 'nb_purchases',\n", " 'total_amount',\n", " 'nb_suppliers',\n", " 'vente_internet_max',\n", " 'purchase_date_min',\n", " 'purchase_date_max',\n", " 'time_between_purchase',\n", " 'nb_tickets_internet',\n", " 'nb_campaigns',\n", " 'nb_...\n", " 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n", " 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n", " 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n", " 6.400000e+01]),\n", " 'LogisticRegression_cv__class_weight': ['balanced',\n", " {0.0: 0.5223906809346011,\n", " 1.0: 11.665359406898034}],\n", " 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n", " scoring=make_scorer(recall_score, response_method='predict'))" ] }, "execution_count": 286, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = load_model(type_of_activity, \"LogisticRegression_cv\")\n", "# model = load_model(type_of_activity, \"randomF_cv\")\n", "model" ] }, { "cell_type": "markdown", "id": "006819e7-e9c5-48d9-85ee-aa43d5e4c9c2", "metadata": {}, "source": [ "## Quartile clustering" ] }, { "cell_type": "code", "execution_count": 287, "id": "018d8ff4-3436-4eec-8507-d1a265cbabf1", "metadata": {}, "outputs": [], "source": [ "y_pred = model.predict(X_test)\n", "y_pred_prob = model.predict_proba(X_test)[:, 1]" ] }, { "cell_type": "code", "execution_count": 288, "id": "846f53b9-73c2-4a8b-9d9e-f11bf59ce9ba", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1080/375041546.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " X_test_segment[\"has_purchased\"] = y_test\n", "/tmp/ipykernel_1080/375041546.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " X_test_segment[\"has_purchased_estim\"] = y_pred\n", "/tmp/ipykernel_1080/375041546.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " X_test_segment[\"score\"] = y_pred_prob\n", "/tmp/ipykernel_1080/375041546.py:6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " X_test_segment[\"quartile\"] = np.where(X_test['score']<0.25, '1',\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelity...gender_femalegender_malegender_othernb_campaignsnb_campaigns_openedhas_purchasedhas_purchased_estimscorequartilescore_adjusted
02.01.022.01.01.0307.203553307.2035530.0000002.01...0010.00.00.00.00.36796120.010594
1269.08.050.02.01.0378.20809039.389595338.81849566.010...00165.01.01.01.00.99873140.397108
20.00.00.00.00.0550.000000550.000000-1.0000000.00...0104.02.00.00.00.21199710.014916
30.00.00.00.00.0550.000000550.000000-1.0000000.00...1002.00.00.00.00.24656310.024670
40.00.00.00.00.0550.000000550.000000-1.0000000.00...0014.00.00.00.00.10857510.025205
50.00.00.00.00.0550.000000550.000000-1.0000000.00...1007.00.00.00.00.25724420.046644
60.00.00.00.00.0550.000000550.000000-1.0000000.01...0102.00.00.00.00.20319610.023026
70.00.00.00.00.0550.000000550.000000-1.0000000.00...01010.08.00.00.00.24004910.003825
81.01.011.01.01.0456.255104456.2551040.0000001.01...0013.03.00.00.00.34009820.006850
90.00.00.00.00.0550.000000550.000000-1.0000000.00...01010.06.00.00.00.23447010.003745
\n", "

10 rows × 22 columns

\n", "
" ], "text/plain": [ " nb_tickets nb_purchases total_amount nb_suppliers vente_internet_max \\\n", "0 2.0 1.0 22.0 1.0 1.0 \n", "1 269.0 8.0 50.0 2.0 1.0 \n", "2 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 \n", "5 0.0 0.0 0.0 0.0 0.0 \n", "6 0.0 0.0 0.0 0.0 0.0 \n", "7 0.0 0.0 0.0 0.0 0.0 \n", "8 1.0 1.0 11.0 1.0 1.0 \n", "9 0.0 0.0 0.0 0.0 0.0 \n", "\n", " purchase_date_min purchase_date_max time_between_purchase \\\n", "0 307.203553 307.203553 0.000000 \n", "1 378.208090 39.389595 338.818495 \n", "2 550.000000 550.000000 -1.000000 \n", "3 550.000000 550.000000 -1.000000 \n", "4 550.000000 550.000000 -1.000000 \n", "5 550.000000 550.000000 -1.000000 \n", "6 550.000000 550.000000 -1.000000 \n", "7 550.000000 550.000000 -1.000000 \n", "8 456.255104 456.255104 0.000000 \n", "9 550.000000 550.000000 -1.000000 \n", "\n", " nb_tickets_internet fidelity ... gender_female gender_male \\\n", "0 2.0 1 ... 0 0 \n", "1 66.0 10 ... 0 0 \n", "2 0.0 0 ... 0 1 \n", "3 0.0 0 ... 1 0 \n", "4 0.0 0 ... 0 0 \n", "5 0.0 0 ... 1 0 \n", "6 0.0 1 ... 0 1 \n", "7 0.0 0 ... 0 1 \n", "8 1.0 1 ... 0 0 \n", "9 0.0 0 ... 0 1 \n", "\n", " gender_other nb_campaigns nb_campaigns_opened has_purchased \\\n", "0 1 0.0 0.0 0.0 \n", "1 1 65.0 1.0 1.0 \n", "2 0 4.0 2.0 0.0 \n", "3 0 2.0 0.0 0.0 \n", "4 1 4.0 0.0 0.0 \n", "5 0 7.0 0.0 0.0 \n", "6 0 2.0 0.0 0.0 \n", "7 0 10.0 8.0 0.0 \n", "8 1 3.0 3.0 0.0 \n", "9 0 10.0 6.0 0.0 \n", "\n", " has_purchased_estim score quartile score_adjusted \n", "0 0.0 0.367961 2 0.010594 \n", "1 1.0 0.998731 4 0.397108 \n", "2 0.0 0.211997 1 0.014916 \n", "3 0.0 0.246563 1 0.024670 \n", "4 0.0 0.108575 1 0.025205 \n", "5 0.0 0.257244 2 0.046644 \n", "6 0.0 0.203196 1 0.023026 \n", "7 0.0 0.240049 1 0.003825 \n", "8 0.0 0.340098 2 0.006850 \n", "9 0.0 0.234470 1 0.003745 \n", "\n", "[10 rows x 22 columns]" ] }, "execution_count": 288, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_segment = X_test\n", "\n", "X_test_segment[\"has_purchased\"] = y_test\n", "X_test_segment[\"has_purchased_estim\"] = y_pred\n", "X_test_segment[\"score\"] = y_pred_prob\n", "X_test_segment[\"quartile\"] = np.where(X_test['score']<0.25, '1',\n", " np.where(X_test['score']<0.5, '2',\n", " np.where(X_test['score']<0.75, '3', '4')))\n", "X_test_segment.head(10)" ] }, { "cell_type": "code", "execution_count": 86, "id": "fb592fe3-ea40-4e83-8fe9-c52b9ee42f2a", "metadata": {}, "outputs": [], "source": [ "def df_segment(df, y, model) :\n", "\n", " y_pred = model.predict(df)\n", " y_pred_prob = model.predict_proba(df)[:, 1]\n", "\n", " df_segment = df\n", "\n", " df_segment[\"has_purchased\"] = y\n", " df_segment[\"has_purchased_estim\"] = y_pred\n", " df_segment[\"score\"] = y_pred_prob\n", " df_segment[\"quartile\"] = np.where(df_segment['score']<0.25, '1',\n", " np.where(df_segment['score']<0.5, '2',\n", " np.where(df_segment['score']<0.75, '3', '4')))\n", "\n", " return df_segment" ] }, { "cell_type": "code", "execution_count": 88, "id": "968645d5-58cc-485a-bd8b-99f4cfc26fec", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1080/2624515794.py:8: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_segment[\"has_purchased\"] = y\n", "/tmp/ipykernel_1080/2624515794.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_segment[\"has_purchased_estim\"] = y_pred\n", "/tmp/ipykernel_1080/2624515794.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_segment[\"score\"] = y_pred_prob\n", "/tmp/ipykernel_1080/2624515794.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_segment[\"quartile\"] = np.where(df_segment['score']<0.25, '1',\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelity...opt_ingender_femalegender_malegender_othernb_campaignsnb_campaigns_openedhas_purchasedhas_purchased_estimscorequartile
04.01.0100.001.00.05.1771875.1771870.0000000.01...False1000.00.00.00.00.0060661
11.01.055.001.00.0426.265613426.2656130.0000000.02...True0100.00.01.00.00.2888472
217.01.080.001.00.0436.033437436.0334370.0000000.02...True1000.00.00.00.00.1032641
34.01.0120.001.00.05.1964125.1964120.0000000.01...False1000.00.00.00.00.0089281
434.02.0416.001.00.0478.693148115.631470363.0616780.04...False1000.00.01.01.00.9928094
..................................................................
960911.01.067.311.01.0278.442257278.4422570.0000001.02...False01015.05.01.00.00.3517622
960921.01.061.411.01.0189.207373189.2073730.0000001.01...False01012.09.00.01.00.5678143
960930.00.00.000.00.0550.000000550.000000-1.0000000.01...True10029.03.00.00.00.0046521
960941.01.079.431.01.0279.312905279.3129050.0000001.01...False01020.04.00.00.00.2930422
960950.00.00.000.00.0550.000000550.000000-1.0000000.02...False01031.04.00.01.00.7878524
\n", "

96096 rows × 21 columns

\n", "
" ], "text/plain": [ " nb_tickets nb_purchases total_amount nb_suppliers \\\n", "0 4.0 1.0 100.00 1.0 \n", "1 1.0 1.0 55.00 1.0 \n", "2 17.0 1.0 80.00 1.0 \n", "3 4.0 1.0 120.00 1.0 \n", "4 34.0 2.0 416.00 1.0 \n", "... ... ... ... ... \n", "96091 1.0 1.0 67.31 1.0 \n", "96092 1.0 1.0 61.41 1.0 \n", "96093 0.0 0.0 0.00 0.0 \n", "96094 1.0 1.0 79.43 1.0 \n", "96095 0.0 0.0 0.00 0.0 \n", "\n", " vente_internet_max purchase_date_min purchase_date_max \\\n", "0 0.0 5.177187 5.177187 \n", "1 0.0 426.265613 426.265613 \n", "2 0.0 436.033437 436.033437 \n", "3 0.0 5.196412 5.196412 \n", "4 0.0 478.693148 115.631470 \n", "... ... ... ... \n", "96091 1.0 278.442257 278.442257 \n", "96092 1.0 189.207373 189.207373 \n", "96093 0.0 550.000000 550.000000 \n", "96094 1.0 279.312905 279.312905 \n", "96095 0.0 550.000000 550.000000 \n", "\n", " time_between_purchase nb_tickets_internet fidelity ... opt_in \\\n", "0 0.000000 0.0 1 ... False \n", "1 0.000000 0.0 2 ... True \n", "2 0.000000 0.0 2 ... True \n", "3 0.000000 0.0 1 ... False \n", "4 363.061678 0.0 4 ... False \n", "... ... ... ... ... ... \n", "96091 0.000000 1.0 2 ... False \n", "96092 0.000000 1.0 1 ... False \n", "96093 -1.000000 0.0 1 ... True \n", "96094 0.000000 1.0 1 ... False \n", "96095 -1.000000 0.0 2 ... False \n", "\n", " gender_female gender_male gender_other nb_campaigns \\\n", "0 1 0 0 0.0 \n", "1 0 1 0 0.0 \n", "2 1 0 0 0.0 \n", "3 1 0 0 0.0 \n", "4 1 0 0 0.0 \n", "... ... ... ... ... \n", "96091 0 1 0 15.0 \n", "96092 0 1 0 12.0 \n", "96093 1 0 0 29.0 \n", "96094 0 1 0 20.0 \n", "96095 0 1 0 31.0 \n", "\n", " nb_campaigns_opened has_purchased has_purchased_estim score \\\n", "0 0.0 0.0 0.0 0.006066 \n", "1 0.0 1.0 0.0 0.288847 \n", "2 0.0 0.0 0.0 0.103264 \n", "3 0.0 0.0 0.0 0.008928 \n", "4 0.0 1.0 1.0 0.992809 \n", "... ... ... ... ... \n", "96091 5.0 1.0 0.0 0.351762 \n", "96092 9.0 0.0 1.0 0.567814 \n", "96093 3.0 0.0 0.0 0.004652 \n", "96094 4.0 0.0 0.0 0.293042 \n", "96095 4.0 0.0 1.0 0.787852 \n", "\n", " quartile \n", "0 1 \n", "1 2 \n", "2 1 \n", "3 1 \n", "4 4 \n", "... ... \n", "96091 2 \n", "96092 3 \n", "96093 1 \n", "96094 2 \n", "96095 4 \n", "\n", "[96096 rows x 21 columns]" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_segment(X_test, y_test, model)" ] }, { "cell_type": "markdown", "id": "ad16b8ab-7e01-404b-971e-866e9b9d5aa4", "metadata": {}, "source": [ "## definition of functions to compute the bias of scores and adjust it \n", "\n", "Le biais est calculé de la façon suivante. \n", "En notant $\\hat{p(x_i)}$ le score calculé (estimé par la modélisation) et $p(x_i)$ le vrai score (sans biais), et $\\beta$ le logarithme du biais, on a : \\\n", "$\\ln{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}} = \\beta + \\ln{\\frac{p(x_i)}{1-p(x_i)}}$ \\\n", "$ \\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}} = \\exp(\\beta) . \\frac{p(x_i)}{1-p(x_i)} $ , soit : \\\n", "$p(x_i) = {\\frac{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}{B+\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}}$ \\\n", "Ce qu'on appelle biais et qu'on estime dans le code par la suite est : $B=\\exp(\\beta) $. Les probabilités ne sont donc pas biaisées si $B=1$. Il y a surestimation si $B>1$. \n", "\n", "On cherche le B qui permette d'ajuster les probabilités de telle sorte que la somme des scores soit égale à la somme des y_has_purchased. Cela revient à résoudre : \n", "\n", "\\begin{equation}\n", "\\sum_{i}{\\frac{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}{B+\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}} = \\sum_{i}{Y_i}\n", "\\end{equation}\n", "\n", "C'est ce que fait la fonction find_bias. \n", "\n", "Note sur les notations : \\\n", "$\\hat{p(x_i)}$ correspond à ce qu'on appelle le score et $p(x_i)$ à ce qu'on appellera le score adjusted" ] }, { "cell_type": "code", "execution_count": 89, "id": "f0379536-a6c5-4b16-bde5-d0319ec1b140", "metadata": {}, "outputs": [], "source": [ "# compute adjusted score from odd ratios (cf formula above)\n", "def adjusted_score(odd_ratio, bias) :\n", " adjusted_score = odd_ratio/(bias+odd_ratio)\n", " return adjusted_score" ] }, { "cell_type": "code", "execution_count": 90, "id": "32a0dfd0-f49d-4785-a56f-706d381bfe41", "metadata": {}, "outputs": [], "source": [ "# when the score is 1 we cannot compute the odd ratio, so we adjust scores equal to 1\n", "# we set the second best score instead\n", "\n", "def adjust_score_1(score) :\n", " second_best_score = np.array([element for element in score if element !=1]).max()\n", " new_score = np.array([element if element!=1 else second_best_score for element in score]) \n", " return new_score" ] }, { "cell_type": "code", "execution_count": 91, "id": "2dff1def-02df-413e-afce-b4aeaf7752b6", "metadata": {}, "outputs": [], "source": [ "def odd_ratio(score) :\n", " return score / (1 - score)" ] }, { "cell_type": "code", "execution_count": 92, "id": "683d71fc-7442-4028-869c-49c57592d6e9", "metadata": {}, "outputs": [], "source": [ "# definition of a function that automatically detects the bias\n", "\n", "def find_bias(odd_ratios, y_objective, initial_guess=6) :\n", " \"\"\"\n", " results = minimize(lambda bias : (sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective)**2 ,\n", " initial_guess , method = \"BFGS\")\n", "\n", " estimated_bias = results.x[0]\n", " \"\"\"\n", "\n", " # faster method\n", " bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)\n", " \n", " return bias_estimated[0]" ] }, { "cell_type": "code", "execution_count": 289, "id": "f17dc6ca-7a48-441b-8c04-11c47b8b9741", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.3940650533525649 0.04284869976359338\n" ] }, { "data": { "text/plain": [ "0.04286194557403322" ] }, "execution_count": 289, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(X_test_segment[\"score\"].mean(), y_test[\"y_has_purchased\"].mean())\n", "y_train[\"y_has_purchased\"].mean()" ] }, { "cell_type": "code", "execution_count": 290, "id": "781b0d40-c954-4c54-830a-e709c8667328", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "22.577005337484817" ] }, "execution_count": 290, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# computation with the function defined\n", "\n", "bias_test_set = find_bias(odd_ratios = odd_ratio(adjust_score_1(X_test_segment[\"score\"])), \n", " y_objective = y_test[\"y_has_purchased\"].sum(),\n", " initial_guess=6)\n", "bias_test_set" ] }, { "cell_type": "code", "execution_count": 291, "id": "248cb862-418e-4767-9933-70c4885ecf40", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "22.690061493186622" ] }, "execution_count": 291, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# comparison with bias of the train set\n", "X_train_score = model.predict_proba(X_train)[:, 1]\n", "\n", "bias_train_set = find_bias(odd_ratios = odd_ratio(adjust_score_1(X_train_score)), \n", " y_objective = y_train[\"y_has_purchased\"].sum(),\n", " initial_guess=6)\n", "bias_train_set" ] }, { "cell_type": "code", "execution_count": 292, "id": "fff6cbe6-7bb3-4732-9b81-b9ac5383bbcf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "betâ test - betâ train = -0.0049950835646278635\n" ] } ], "source": [ "print(\"betâ test - betâ train = \",np.log(bias_test_set/bias_train_set))" ] }, { "cell_type": "code", "execution_count": 293, "id": "f506870d-4a8a-4b2c-8f0b-e0789080b20c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean absolute erreur 0.00017894295558797563\n" ] } ], "source": [ "# impact of considering a bias computed on train set instead of test set - totally neglectable\n", "\n", "score_adjusted_test = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_test_set)\n", "score_adjusted_train = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_train_set)\n", "\n", "print(\"mean absolute erreur\",abs(score_adjusted_test-score_adjusted_train).mean())" ] }, { "cell_type": "code", "execution_count": 294, "id": "8213d0e4-063b-49fa-90b7-677fc34f4c01", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1080/1825363704.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " X_test_segment[\"score_adjusted\"] = score_adjusted_train\n" ] } ], "source": [ "# adjust scores accordingly \n", "\n", "# X_test_segment[\"score_adjusted\"] = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_test_set)\n", "\n", "# actually, we are not supposed to have X_test, so the biais is estimated on X_train\n", "# X_test_segment[\"score_adjusted\"] = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_train_set)\n", "X_test_segment[\"score_adjusted\"] = score_adjusted_train" ] }, { "cell_type": "code", "execution_count": 295, "id": "834d3723-2e72-4c65-9c62-e2d595c69461", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE for score : 0.18391062438077188\n", "MSE for ajusted score : 0.037093800862222845\n", "sum of y_has_purchased : 7975.0\n", "sum of adjusted scores : 7941.695137104767\n" ] } ], "source": [ "# check \n", "\n", "MSE_score = ((X_test_segment[\"score\"]-X_test_segment[\"has_purchased\"])**2).mean()\n", "MSE_ajusted_score = ((X_test_segment[\"score_adjusted\"]-X_test_segment[\"has_purchased\"])**2).mean()\n", "print(f\"MSE for score : {MSE_score}\")\n", "print(f\"MSE for ajusted score : {MSE_ajusted_score}\")\n", "\n", "print(\"sum of y_has_purchased :\",y_test[\"y_has_purchased\"].sum())\n", "print(\"sum of adjusted scores :\", X_test_segment[\"score_adjusted\"].sum())" ] }, { "cell_type": "code", "execution_count": 296, "id": "9f30a4dd-a9d8-405a-a7d5-5324ae88cf70", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MAE for score : 0.38422988971624206\n", "MAE for adjusted score : 0.07284616452278603\n" ] } ], "source": [ "# mean absolute error - divided by 2 with out method\n", "\n", "MAE_score = abs(X_test_segment[\"score\"]-X_test_segment[\"has_purchased\"]).mean()\n", "MAE_ajusted_score = abs(X_test_segment[\"score_adjusted\"]-X_test_segment[\"has_purchased\"]).mean()\n", "print(f\"MAE for score : {MAE_score}\")\n", "print(f\"MAE for adjusted score : {MAE_ajusted_score}\")" ] }, { "cell_type": "code", "execution_count": 103, "id": "6f9396db-e213-408c-a596-eaeec3bc79f3", "metadata": {}, "outputs": [], "source": [ "# visualization\n", "\n", "# histogramme des probas et des probas ajustées\n", "\n", "def plot_hist_scores(df, score, score_adjusted, type_of_activity) :\n", "\n", " plt.figure()\n", " plt.hist(df[score], label = \"score\", alpha=0.6)\n", " plt.hist(df[score_adjusted], label=\"adjusted score\", alpha=0.6)\n", " plt.legend()\n", " plt.xlabel(\"probability of a future purchase\")\n", " plt.ylabel(\"count\")\n", " plt.title(f\"Comparison between score and adjusted score for {type_of_activity} companies\")\n", " # plt.show()" ] }, { "cell_type": "code", "execution_count": 64, "id": "def64c16-f4dd-493c-909c-d886d7f53947", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'projet-bdc2324-team1/Output_expected_CA/sport/hist_score_adjustedsport.png'" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "PATH + file_name + type_of_activity + \".png\"" ] }, { "cell_type": "code", "execution_count": 297, "id": "b478d40d-9677-4204-87bd-16fb0bc1fe9a", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAn4AAAHFCAYAAABsNfRLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwdklEQVR4nO3de1yO9/8H8Nfd+UC3dC4pNqWUY0MaOZYo52FtKYfMQihstq85zJjjbGxsNofRxjaHzdDK+VCRCNEwizIlowOhUp/fHx5dv12VVDq6X8/H435wX9f7uq739bmv+7refa7DrRBCCBARERHRS0+tthMgIiIioprBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFfFSF37nz5/H6NGj0axZM+jo6KBBgwZo3749lixZgnv37tV2etUuICAAtra2tZ1GqTZu3AiFQoHTp09X2TyjoqIwd+5cZGZmVtk8qX47fPgwFAoFDh8+XOPLqe7v3w8//ICVK1dWy7xtbW0REBBQLfOui65fv47+/fujcePGUCgUmDp1am2nRC+x7t27o3v37rW2fI1aW3I1W7duHYKCgmBvb48ZM2bA0dER+fn5OH36NNauXYvo6Gjs3LmzttOsVrNnz8aUKVNqO40aExUVhXnz5iEgIACNGjWq7XRIxVX39++HH35AQkICi5QqMG3aNJw8eRLr16+Hubk5LCwsajsleol99dVXtbr8l7Lwi46Oxrvvvos+ffpg165d0NbWlsb16dMHoaGhCA8Pr8UMq9fDhw+hp6eHV155pbZToZeMEAKPHz+Grq5ubadS5/H7V/0KCgrw5MkT2T6+MhISEtCxY0cMGjSoTuVFLydHR8daXf5Leap34cKFUCgU+Oabb0r94mlpaWHAgAHS+8LCQixZsgQtW7aEtrY2TE1NMWrUKNy8eVM2Xffu3eHk5ITo6Gh06dIFurq6sLW1xYYNGwAAe/bsQfv27aGnpwdnZ+cSxeXcuXOhUChw9uxZDBkyBAYGBlAqlXj77bdx584dWey2bdvg4eEBCwsL6OrqwsHBAe+//z5ycnJkcQEBAWjQoAEuXLgADw8PNGzYEL169ZLGFT/V9PPPP6NTp05QKpXQ09ND8+bNMWbMGFlMcnIy3n77bZiamkJbWxsODg5Yvnw5CgsLpZjr169DoVBg2bJlWLFiBZo1a4YGDRrA1dUVMTExZX08MhkZGRg9ejQaN24MfX19+Pj44O+//y4Rt3//fvTq1QsGBgbQ09ODm5sbDhw4IGvbGTNmAACaNWsGhUIhnXqbMWMGlEolCgoKpPjJkydDoVBg6dKl0rC7d+9CTU0Nq1atkoZlZ2dj+vTpaNasGbS0tGBlZYWpU6eW+ByEEPjqq6/Qtm1b6OrqwtDQEMOGDSuxLkXbUGxsLLp27Sp9Bp9++qmsfZ+lPJ9fZmYmQkND0bx5c2l77tevH/78808p5t69ewgKCoKVlRW0tLTQvHlzfPjhh8jNzZXNS6FQYNKkSVi7di0cHBygra2NTZs2AQCuXr0KX19f2Xby5ZdfPncdAODLL79Et27dYGpqCn19fTg7O2PJkiXIz8+vdHv9+eef6Nu3L/T09GBsbIwJEybg/v375crnr7/+wujRo9GiRQvo6enBysoKPj4+uHDhQonY8i6n+Pev6DuzcePGErEKhQJz586V3t+5cwfjx4+HtbU1tLW1YWJiAjc3N+zfv19qlz179uDGjRvStq5QKKTp8/LysGDBAmmfZmJigtGjR5fYz+Tn52PmzJkwNzeHnp4eXn/9dZw6dapcbQYAa9asQZs2bdCgQQM0bNgQLVu2xAcffCCL+eeff6R10dLSgqWlJYYNG4bbt29LMRXZ5yxZsgQLFixAs2bNoK2tjUOHDgEATp8+jQEDBqBx48bQ0dFBu3bt8NNPP5WZf9Ep+r/++gv79u2T2vH69etVlldpir5XGzZsgL29PXR1deHi4oKYmBgIIbB06VJpn9qzZ0/89ddfsumfdSq++CnEwsJCLFiwQFpGo0aN0Lp1a3z++eey6cr7XS7v/vBZwsPD0atXL2n/5eDggEWLFslifvvtN7i6ukJPTw8NGzZEnz59EB0dLYspOpaeP38eb7zxBpRKJRo3boyQkBA8efIEly9fRt++fdGwYUPY2tpiyZIlsumLPvctW7YgJCQE5ubm0NXVhbu7O86ePSuLPX36NEaOHAlbW1vpmP/mm2/ixo0bsriiy5cOHTqEd999F8bGxjAyMsKQIUNw69YtWWxpp3rL+509ePAgunfvDiMjI+jq6qJp06YYOnQoHj58WK7PAAAgXjJPnjwRenp6olOnTuWeZvz48QKAmDRpkggPDxdr164VJiYmwtraWty5c0eKc3d3F0ZGRsLe3l5899134o8//hDe3t4CgJg3b55wdnYWP/74o9i7d6/o3Lmz0NbWFv/88480/Zw5cwQAYWNjI2bMmCH++OMPsWLFCqGvry/atWsn8vLypNiPP/5YfPbZZ2LPnj3i8OHDYu3ataJZs2aiR48estz9/f2FpqamsLW1FYsWLRIHDhwQf/zxhzTOxsZGio2KihIKhUKMHDlS7N27Vxw8eFBs2LBB+Pn5STHp6enCyspKmJiYiLVr14rw8HAxadIkAUC8++67UlxSUpIAIGxtbUXfvn3Frl27xK5du4Szs7MwNDQUmZmZZbb5hg0bBABhbW0txowZI/bt2ye++eYbYWpqKqytrUVGRoYUu3nzZqFQKMSgQYPEjh07xO7du4W3t7dQV1cX+/fvF0IIkZKSIiZPniwAiB07dojo6GgRHR0tsrKyRHh4uAAgoqKipHm2bNlS6Orqij59+kjDtm3bJgCIS5cuCSGEyMnJEW3bthXGxsZixYoVYv/+/eLzzz8XSqVS9OzZUxQWFkrTBgYGCk1NTREaGirCw8PFDz/8IFq2bCnMzMxEWlpaiW2oRYsWYu3atSIyMlIEBQUJAGLTpk1ltll5Pr/s7GzRqlUroa+vL+bPny/++OMPsX37djFlyhRx8OBBIYQQjx49Eq1btxb6+vpi2bJlIiIiQsyePVtoaGiIfv36yZYJQFhZWYnWrVuLH374QRw8eFAkJCSIixcvCqVSKZydncX3338vIiIiRGhoqFBTUxNz584tcz2EEGLatGlizZo1Ijw8XBw8eFB89tlnwtjYWIwePVoWV972SktLE6ampsLKykps2LBB7N27V7z11luiadOmAoA4dOhQmfkcOXJEhIaGil9++UUcOXJE7Ny5UwwaNEjo6uqKP//8s1LLKf79K/rObNiwocTyAYg5c+ZI7z09PYWJiYn45ptvxOHDh8WuXbvERx99JLZu3SqEEOLixYvCzc1NmJubS9t6dHS0EEKIgoIC0bdvX6Gvry/mzZsnIiMjxbfffiusrKyEo6OjePjwoSxHhUIhZsyYISIiIsSKFSuElZWVMDAwEP7+/mW22Y8//igAiMmTJ4uIiAixf/9+sXbtWhEcHCzF3Lx5U1hYWMi+Q9u2bRNjxowRiYmJQoiK73OsrKxEjx49xC+//CIiIiJEUlKSOHjwoNDS0hJdu3YV27ZtE+Hh4SIgIOCZ7V0kKytLREdHC3Nzc+Hm5ia14+PHj6skr2cpOg506dJF7NixQ+zcuVPY2dmJxo0bi2nTpomBAweK33//XYSFhQkzMzPRunVr2f7Gxsam1M/H3d1duLu7S+8XLVok1NXVxZw5c8SBAwdEeHi4WLlypew7Wt7vckX2h6X59ttvhUKhEN27dxc//PCD2L9/v/jqq69EUFCQFBMWFiYACA8PD7Fr1y6xbds20aFDB6GlpSWOHTsmxRUdS+3t7cXHH38sIiMjxcyZM6XjeMuWLcUXX3whIiMjxejRowUAsX37dmn6Q4cOScefgQMHit27d4stW7aIV199VRgYGIhr165JsT///LP46KOPxM6dO8WRI0fE1q1bhbu7uzAxMZHVB0XHtObNm4vJkyeLP/74Q3z77bfC0NCwxHG7+OdU3u9sUlKS0NHREX369BG7du0Shw8fFmFhYcLPz092zHyel67wS0tLEwDEyJEjyxWfmJgoAMg2PiGEOHnypAAgPvjgA2mYu7u7ACBOnz4tDbt7965QV1cXurq6siIvPj5eABBffPGFNKxoY502bZpsWUUb+5YtW0rNsbCwUOTn54sjR44IAOLcuXPSOH9/fwFArF+/vsR0xQ88y5YtEwDKLMref/99AUCcPHlSNvzdd98VCoVCXL58WQjx/zs7Z2dn8eTJEynu1KlTAoD48ccfn7kMIf7/SzJ48GDZ8BMnTggAYsGCBUKIpzubxo0bCx8fH1lcQUGBaNOmjejYsaM0bOnSpQJAiR1uTk6O0NLSEvPnzxdCPD0YARDvvfee0NXVFY8fPxZCPC3eLC0tpekWLVok1NTURGxsrGx+v/zyiwAg9u7dK4QQIjo6WgAQy5cvl8WlpKQIXV1dMXPmTGlY0TZUvH0dHR2Fp6dnmW1Wns9v/vz5AoCIjIx8ZszatWsFAPHTTz/Jhi9evFgAEBEREdIwAEKpVIp79+7JYj09PUWTJk1EVlaWbPikSZOEjo5OifiyFBQUiPz8fPH9998LdXV12bTlba/33ntPKBQKER8fL4vr06dPuQq/4p48eSLy8vJEixYtZN/XiiznRQq/Bg0aiKlTp5aZY//+/WXzL1JUkP33QCeEELGxsQKA+Oqrr4QQ/7/ve9b+6HmF36RJk0SjRo3KjBkzZozQ1NSU/pgqTUX3Oa+88orsj2Qhnv4h165dO5Gfny8b7u3tLSwsLERBQUGZedrY2Ij+/ftXeV7PAkCYm5uLBw8eSMN27dolAIi2bdvKiqiVK1cKAOL8+fOyfMtT+Hl7e4u2bduWmUt5v8vl3R+W5v79+8LAwEC8/vrrzywQCwoKhKWlpXB2dpZ9Xvfv3xempqaiS5cu0rCiY2nxfW7btm2lP/6L5OfnCxMTEzFkyBBpWFHh1759e1k+169fF5qammLcuHHPXJcnT56IBw8eCH19ffH5559Lw4uOacVriSVLlggAIjU1VRpW/HMq73e2qK2L738q6qU81VsRRd3xxbvNO3bsCAcHB9npRACwsLBAhw4dpPeNGzeGqakp2rZtC0tLS2m4g4MDAJToDgaAt956S/Z++PDh0NDQkJ0a+Pvvv+Hr6wtzc3Ooq6tDU1MT7u7uAIDExMQS8xw6dOhz1/W1116TlvfTTz/hn3/+KRFz8OBBODo6omPHjrLhAQEBEELg4MGDsuH9+/eHurq69L5169YASl/v0hRviy5dusDGxkZqi6ioKNy7dw/+/v548uSJ9CosLETfvn0RGxv73NMMenp6cHV1lU6TRUZGolGjRpgxYwby8vJw/PhxAE9PJ/fu3Vua7vfff4eTkxPatm0rW7anp6fsDs7ff/8dCoUCb7/9tizO3Nwcbdq0KXFHqbm5eYn2bd269XPbrDyf3759+2BnZydbj+IOHjwIfX19DBs2TDa86DtQfJvv2bMnDA0NpfePHz/GgQMHMHjwYOjp6cnWuV+/fnj8+PFzT/efPXsWAwYMgJGRkbR9jxo1CgUFBbhy5YostjztdejQIbRq1Qpt2rSRxfn6+paZR5EnT55g4cKFcHR0hJaWFjQ0NKClpYWrV6/Kvm8vupzy6tixIzZu3IgFCxYgJiamxCnwsvz+++9o1KgRfHx8ZJ9N27ZtYW5uLm2PRd+xZ+2PypNjZmYm3nzzTfz666/4999/S8Ts27cPPXr0kPaHpanoPmfAgAHQ1NSU3v/111/4888/pfUovj2mpqbi8uXLz12fqs7reXr06AF9fX3pfVEbeXl5yU7bl3UseZ6OHTvi3LlzCAoKwh9//IHs7GzZ+Ip8l8u7PyxNVFQUsrOzERQUJFu3/7p8+TJu3boFPz8/qKn9f2nSoEEDDB06FDExMSVOZ3p7e8veOzg4QKFQwMvLSxqmoaGBV199tdT28/X1leVjY2ODLl26yI7FDx48wHvvvYdXX30VGhoa0NDQQIMGDZCTk1Pqsfi/l5EB5Tsmlvc727ZtW2hpaWH8+PHYtGlTqZdFlcdLV/gZGxtDT08PSUlJ5Yq/e/cuAJR6F5elpaU0vkjjxo1LxGlpaZUYrqWlBeDpF6s4c3Nz2XsNDQ0YGRlJy3rw4AG6du2KkydPYsGCBTh8+DBiY2OxY8cOAMCjR49k0+vp6cHAwKDM9QSAbt26YdeuXXjy5AlGjRqFJk2awMnJCT/++KMUc/fu3We2RdH4/zIyMpK9L7qmsniOz1K8LYqGFS2n6DqgYcOGQVNTU/ZavHgxhBDlejRP7969ERMTg5ycHOzfvx89e/aEkZEROnTogP379yMpKQlJSUmygun27ds4f/58ieU2bNgQQgjpQHf79m0IIWBmZlYiNiYmpsQBsXibFbXb89qsPJ/fnTt30KRJkzLnc/fuXZibm5fYAZuamkJDQ6PEZ1x8e7h79y6ePHmCVatWlVjffv36AUCpRUCR5ORkdO3aFf/88w8+//xzHDt2DLGxsdI1RcXboTztVbROxZU2rDQhISGYPXs2Bg0ahN27d+PkyZOIjY1FmzZtqnQ55bVt2zb4+/vj22+/haurKxo3boxRo0YhLS3tudPevn0bmZmZ0NLSKvH5pKWlSZ9N0ef8rP3R8/j5+WH9+vW4ceMGhg4dClNTU3Tq1AmRkZFSTHm3x4rsc4rHFu0jpk+fXmJ9g4KCAJS9PVZXXs/zrGNGRY4lzzNr1iwsW7YMMTEx8PLygpGREXr16iU9Rqsi3+Xy7g9LU3SdWlnbwvOOxYWFhcjIyJANL62t9PT0oKOjU2J4eY7FRcP++9n6+vpi9erVGDduHP744w+cOnUKsbGxMDExKXWfXZljYnm/s6+88gr2798PU1NTTJw4Ea+88gpeeeWVEtdsPs9Ld1evuro6evXqhX379uHmzZvP3ekUfUipqaklYm/dugVjY+MqzzEtLQ1WVlbS+ydPnuDu3btSLgcPHsStW7dw+PBhqZcPwDOfT/esv6BKM3DgQAwcOBC5ubmIiYnBokWL4OvrC1tbW7i6usLIyAipqaklpiu6OLWq26O0A1laWhpeffVV2fJWrVqFzp07lzoPMzOz5y6nV69emD17No4ePYoDBw5gzpw50vCIiAg0a9ZMel/E2NgYurq6WL9+fanzLMrN2NgYCoUCx44dK/Vmoqq8s+95n5+JiUmJm5KKMzIywsmTJyGEkG076enpePLkSYnPuPj2ZWhoCHV1dfj5+WHixImlLqOoPUuza9cu5OTkYMeOHbCxsZGGx8fHl5l3WYyMjJ65LZXHli1bMGrUKCxcuFA2/N9//5U9GuhFllN0MCp+A03xAgJ4uk2tXLkSK1euRHJyMn777Te8//77SE9Pf+4TCYouKn9WXMOGDaV1Kcq9tP1ReYwePRqjR49GTk4Ojh49ijlz5sDb2xtXrlyBjY1NubfHiuxzim+PReNnzZqFIUOGlLoMe3v7cq1PVeZVnXR0dEpsR8DT7fW/eWloaCAkJAQhISHIzMzE/v378cEHH8DT0xMpKSkV+i6Xd39YGhMTEwAoc1v477G4uFu3bkFNTU125qEqPOu7XJRLVlYWfv/9d8yZMwfvv/++FJObm1ulzwIu73cWALp27YquXbuioKAAp0+fxqpVqzB16lSYmZlh5MiR5VreS9fjBzzdAQghEBgYiLy8vBLj8/PzsXv3bgBPT2MBT3f8/xUbG4vExERZIVBVwsLCZO9/+uknPHnyRLrLp2gHUrxg+Prrr6ssB21tbbi7u2Px4sUAIN3J1KtXL1y6dAlnzpyRxX///fdQKBTo0aNHleUAlGyLqKgo3LhxQ2oLNzc3NGrUCJcuXYKLi0upr6K/iMv6y6pjx44wMDDAypUrkZaWhj59+gB42hN49uxZ/PTTT3B0dJSdrvf29sa1a9dgZGRU6nKL7tj09vaGEAL//PNPqXHOzs5V2mZF61ra5+fl5YUrV66UOA31X7169cKDBw+wa9cu2fDvv/9eGl8WPT099OjRA2fPnkXr1q1LXeeyeoxK276FEFi3bl2Zyy1Ljx49cPHiRZw7d042/IcffijX9AqFosT3bc+ePSVOp7/IcszMzKCjo4Pz58/Lhv/6669lTte0aVNMmjQJffr0kX0vn9VL7O3tjbt376KgoKDUz6aoCCr6jj1rf1QR+vr68PLywocffoi8vDxcvHgRwNPt8dChQ2Wean3RfY69vT1atGiBc+fOPXMf8d8DZ3nV9L6wImxtbUtsR1euXCmznRs1aoRhw4Zh4sSJuHfvHq5fv16h73J594el6dKlC5RKJdauXQshRKkx9vb2sLKywg8//CCLycnJwfbt26U7favSjz/+KFvWjRs3EBUVJTsWCyFK7Bu+/fZb2VMiXlR5v7P/pa6ujk6dOklnSopvp2V56Xr8AMDV1RVr1qxBUFAQOnTogHfffRetWrVCfn4+zp49i2+++QZOTk7w8fGBvb09xo8fj1WrVkFNTQ1eXl64fv06Zs+eDWtra0ybNq3K89uxYwc0NDTQp08fXLx4EbNnz0abNm0wfPhwAE+/JIaGhpgwYQLmzJkDTU1NhIWFlTjYVNRHH32EmzdvolevXmjSpAkyMzPx+eefy64fnDZtGr7//nv0798f8+fPh42NDfbs2YOvvvoK7777Luzs7F54/f/r9OnTGDduHN544w2kpKTgww8/hJWVlXSKpkGDBli1ahX8/f1x7949DBs2DKamprhz5w7OnTuHO3fuYM2aNQAgFViff/45/P39oampCXt7ezRs2BDq6upwd3fH7t270axZM+kZa25ubtDW1saBAwcQHBwsy23q1KnYvn07unXrhmnTpqF169YoLCxEcnIyIiIiEBoaik6dOsHNzQ3jx4/H6NGjcfr0aXTr1g36+vpITU3F8ePH4ezsjHffffeF26o8n9/UqVOxbds2DBw4EO+//z46duyIR48e4ciRI/D29kaPHj0watQofPnll/D398f169fh7OyM48ePY+HChejXr1+Z1wcW+fzzz/H666+ja9euePfdd2Fra4v79+/jr7/+wu7du8ssPPv06QMtLS28+eabmDlzJh4/fow1a9aUOI1TEVOnTsX69evRv39/LFiwAGZmZggLC5M9wqYs3t7e2LhxI1q2bInWrVsjLi4OS5cuLXEW4EWWU3Qd6Pr16/HKK6+gTZs2OHXqVImiMSsrCz169ICvry9atmyJhg0bIjY2FuHh4bIeLWdnZ+zYsQNr1qxBhw4doKamBhcXF4wcORJhYWHo168fpkyZgo4dO0JTUxM3b97EoUOHMHDgQAwePBgODg54++23sXLlSmhqaqJ3795ISEjAsmXLynXpSGBgIHR1deHm5gYLCwukpaVh0aJFUCqV0vWo8+fPx759+9CtWzd88MEHcHZ2RmZmJsLDwxESEoKWLVtWyT7n66+/hpeXFzw9PREQEAArKyvcu3cPiYmJOHPmDH7++efnzqO4mt4XVoSfnx/efvttBAUFYejQobhx4waWLFki9awV8fHxgZOTE1xcXGBiYoIbN25g5cqVsLGxQYsWLQCU/7tc3v1haRo0aIDly5dj3Lhx6N27NwIDA2FmZoa//voL586dw+rVq6GmpoYlS5bgrbfegre3N9555x3k5uZi6dKlyMzMxKefflrl7Zieno7BgwcjMDAQWVlZmDNnDnR0dDBr1iwAgIGBAbp164alS5fC2NgYtra2OHLkCL777rsq/ZGA8n5n165di4MHD6J///5o2rQpHj9+LPXAlme/LXmhW0PquPj4eOHv7y+aNm0qtLS0pMemfPTRRyI9PV2KKygoEIsXLxZ2dnZCU1NTGBsbi7ffflukpKTI5ufu7i5atWpVYjml3REmxNM7tyZOnCi9L7oTKS4uTvj4+IgGDRqIhg0bijfffFPcvn1bNm1UVJRwdXUVenp6wsTERIwbN06cOXOmxF2B/v7+Ql9fv9T1L35X4e+//y68vLyElZWV0NLSEqampqJfv36y2+SFEOLGjRvC19dXGBkZCU1NTWFvby+WLl0qu9Oq6E62pUuXlrre/71DsTRFd0BFREQIPz8/0ahRI6Grqyv69esnrl69WiL+yJEjon///qJx48ZCU1NTWFlZif79+4uff/5ZFjdr1ixhaWkp1NTUStxl+fnnnwsAIjAwUDZN0R2Zv/32W4nlPnjwQPzvf/8T9vb2QktLS3rswbRp02SPaRFCiPXr14tOnToJfX19oaurK1555RUxatQo2V3gz9qGin9WpSnv55eRkSGmTJkimjZtKjQ1NYWpqano37+/7LEkd+/eFRMmTBAWFhZCQ0ND2NjYiFmzZkl3OBcpvg3/V1JSkhgzZoywsrISmpqawsTERHTp0kW6I7ssu3fvFm3atBE6OjrCyspKzJgxQ+zbt6/EZ1aR9rp06ZLo06eP0NHREY0bNxZjx44Vv/76a7nu6s3IyBBjx44VpqamQk9PT7z++uvi2LFjJe6+q8hy/P39ha2trWzarKwsMW7cOGFmZib09fWFj4+PuH79uuw78/jxYzFhwgTRunVrYWBgIHR1dYW9vb2YM2eOyMnJkeZ17949MWzYMNGoUSOhUCjEf3fn+fn5YtmyZVIbN2jQQLRs2VK88847su9Xbm6uCA0NFaampkJHR0d07txZREdHP/Ou0f/atGmT6NGjhzAzMxNaWlrC0tJSDB8+XHb3qRBP724fM2aMMDc3F5qamlLcf/d5L7rPEUKIc+fOieHDhwtTU1OhqakpzM3NRc+ePcXatWvLXA8hnr0Pr4q8SlPa9+pZ8ym6A/W/+7rCwkKxZMkS0bx5c6GjoyNcXFzEwYMHS2yvy5cvF126dBHGxsZCS0tLNG3aVIwdO1Zcv369xLLL812uyP6wNHv37hXu7u5CX19f6OnpCUdHR7F48WJZzK5du0SnTp2Ejo6O0NfXF7169RInTpyQxRQdS//7OBUhnn08LL4fKWrTzZs3i+DgYGFiYiK0tbVF165dZftrIZ4+BWLo0KHC0NBQNGzYUPTt21ckJCSU+I4UHdOK3/VctKzi+7Xi+5XyfGejo6PF4MGDhY2NjdDW1hZGRkbC3d291GNXWRRCPKPflarc3LlzMW/ePNy5c6darh0korpj8ODBSElJqdLfoyaiF3f48GH06NEDP//8c4mnG6iCl/IaPyKi2pKcnIytW7fi0KFDcHV1re10iIhkWPgREVWh9evXY8KECejZs6d09zgRUV3BU71EREREKoI9fkREREQqgoUfERERkYpg4UdERESkIl7KBzjXZYWFhbh16xYaNmxYoz/xQ0RERJUnhMD9+/dhaWkJNbX622/Gwq+G3bp1C9bW1rWdBhEREVVCSkpKiV/1qU9Y+NWwot+MTElJKdfPIhEREVHty87OhrW1daV++7kuYeFXw4pO7xoYGLDwIyIiqmfq+2Va9fckNRERERFVCAs/IiIiIhXBwo+IiIhIRfAaPyIiqncKCwuRl5dX22nQS0RTUxPq6uq1nUa1Y+FHRET1Sl5eHpKSklBYWFjbqdBLplGjRjA3N6/3N3CUhYUfERHVG0IIpKamQl1dHdbW1vX6QbpUdwgh8PDhQ6SnpwMALCwsajmj6sPCj4iI6o0nT57g4cOHsLS0hJ6eXm2nQy8RXV1dAEB6ejpMTU1f2tO+/FOJiIjqjYKCAgCAlpZWLWdCL6OiPyby8/NrOZPqw8KPiIjqnZf5GiyqPaqwXbHwIyIiIlIRLPyIiIiIVARv7iAionpv1o4LNbq8RUOca3R5RFWFPX5EREQvkZf5xgR6cSz8iIiIasAvv/wCZ2dn6OrqwsjICL1790ZOTg4AYP369WjVqhW0tbVhYWGBSZMmSdMlJydj4MCBaNCgAQwMDDB8+HDcvn1bGj937ly0bdsW69evR/PmzaGtrQ0hBLKysjB+/HiYmprCwMAAPXv2xLlz52p8valuqdXC7+jRo/Dx8YGlpSUUCgV27dolG69QKEp9LV26VIrp3r17ifEjR46UzScjIwN+fn5QKpVQKpXw8/NDZmamLCY5ORk+Pj7Q19eHsbExgoODS/wc0IULF+Du7g5dXV1YWVlh/vz5EEJUaZsQEdHLJzU1FW+++SbGjBmDxMREHD58GEOGDIEQAmvWrMHEiRMxfvx4XLhwAb/99hteffVVAE8fLDxo0CDcu3cPR44cQWRkJK5du4YRI0bI5v/XX3/hp59+wvbt2xEfHw8A6N+/P9LS0rB3717ExcWhffv26NWrF+7du1fTq091SK1e45eTk4M2bdpg9OjRGDp0aInxqampsvf79u3D2LFjS8QGBgZi/vz50vuihzAW8fX1xc2bNxEeHg4AGD9+PPz8/LB7924AT58L1b9/f5iYmOD48eO4e/cu/P39IYTAqlWrAADZ2dno06cPevTogdjYWFy5cgUBAQHQ19dHaGjoizdGVdg9pbYzqDifz2s7AyKiapeamoonT55gyJAhsLGxAQA4Oz+9TnDBggUIDQ3FlCn/vw9/7bXXAAD79+/H+fPnkZSUBGtrawDA5s2b0apVK8TGxkpxeXl52Lx5M0xMTAAABw8exIULF5Ceng5tbW0AwLJly7Br1y788ssvGD9+fM2sONU5tVr4eXl5wcvL65njzc3NZe9//fVX9OjRA82bN5cN19PTKxFbJDExEeHh4YiJiUGnTp0AAOvWrYOrqysuX74Me3t7RERE4NKlS0hJSYGlpSUAYPny5QgICMAnn3wCAwMDhIWF4fHjx9i4cSO0tbXh5OSEK1euYMWKFQgJCVGJZ/8QEVHltGnTBr169YKzszM8PT3h4eGBYcOGIT8/H7du3UKvXr1KnS4xMRHW1tZS0QcAjo6OaNSoERITE6XCz8bGRir6ACAuLg4PHjyAkZGRbH6PHj3CtWvXqmENqb6oN9f43b59G3v27MHYsWNLjAsLC4OxsTFatWqF6dOn4/79+9K46OhoKJVKqegDgM6dO0OpVCIqKkqKcXJykoo+APD09ERubi7i4uKkGHd3d+kvp6KYW7du4fr168/MOzc3F9nZ2bIXERGpFnV1dURGRmLfvn1wdHTEqlWrYG9vL7tWrzRCiFI7FooP19fXl40vLCyEhYUF4uPjZa/Lly9jxowZVbNSVC/Vm8e5bNq0CQ0bNsSQIUNkw9966y00a9YM5ubmSEhIwKxZs3Du3DlERkYCANLS0mBqalpifqampkhLS5NizMzMZOMNDQ2hpaUli7G1tZXFFE2TlpaGZs2alZr3okWLMG/evIqvMBERvVQUCgXc3Nzg5uaGjz76CDY2NoiMjIStrS0OHDiAHj16lJjG0dERycnJSElJkXr9Ll26hKysLDg4ODxzWe3bt0daWho0NDRKHLtItdWbwm/9+vV46623oKOjIxseGBgo/d/JyQktWrSAi4sLzpw5g/bt2wMo/SdYiv+1VJmYohs7yjrNO2vWLISEhEjvs7OzZV32RET08jt58iQOHDgADw8PmJqa4uTJk7hz5w4cHBwwd+5cTJgwAaampvDy8sL9+/dx4sQJTJ48Gb1790br1q3x1ltvYeXKlXjy5AmCgoLg7u4OFxeXZy6vd+/ecHV1xaBBg7B48WLY29vj1q1b2Lt3LwYNGlTmtPRyqxeF37Fjx3D58mVs27btubHt27eHpqYmrl69ivbt28Pc3LzUrvQ7d+5IPXbm5uY4efKkbHxGRgby8/NlMUW9f0XS09MBoERv4X9pa2vLTg8TEZHqMTAwwNGjR7Fy5UpkZ2fDxsYGy5cvl65zf/z4MT777DNMnz4dxsbGGDZsGABIT7yYPHkyunXrBjU1NfTt21e68fBZFAoF9u7diw8//BBjxozBnTt3YG5ujm7dupV5zKKXX70o/L777jt06NABbdq0eW7sxYsXkZ+fDwsLCwCAq6srsrKycOrUKXTs2BHA07+8srKy0KVLFynmk08+QWpqqjRdREQEtLW10aFDBynmgw8+QF5eHrS0tKQYS0tLdqMTEdWyuv5LGg4ODtKTJUrzzjvv4J133il1XNOmTfHrr78+c9q5c+di7ty5JYY3bNgQX3zxBb744osK50svr1q9uePBgwfSBacAkJSUhPj4eCQnJ0sx2dnZ+PnnnzFu3LgS01+7dg3z58/H6dOncf36dezduxdvvPEG2rVrBzc3NwBPv2x9+/ZFYGAgYmJiEBMTg8DAQHh7e8Pe3h4A4OHhAUdHR/j5+eHs2bM4cOAApk+fjsDAQBgYGAB4+kgYbW1tBAQEICEhATt37sTChQt5Ry8RERHVG7Va+J0+fRrt2rVDu3btAAAhISFo164dPvroIylm69atEELgzTffLDG9lpYWDhw4AE9PT9jb2yM4OBgeHh7Yv38/1NXVpbiwsDA4OzvDw8MDHh4eaN26NTZv3iyNV1dXx549e6CjowM3NzcMHz4cgwYNwrJly6QYpVKJyMhI3Lx5Ey4uLggKCkJISIjs+j0iIiKiukwh+NMTNSo7OxtKpRJZWVlSb2KV4QOciegl9/jxYyQlJaFZs2YlbvYjelFlbV/VevyuQfXmOX5ERERE9GJY+BERERGpCBZ+RERERCqChR8RERGRimDhR0RERKQiWPgRERERqQgWfkRERHXQ9evXoVAopB85KP6+Ltm4cSMaNWpU22lQOdSLn2wjIiIqU00/x7QWnkFqbW2N1NRUGBsbV8n8Nm7ciKlTpyIzM7NK5kf1A3v8iIiI6gF1dXWYm5tDQ4N9Ns+Tl5dX2ynUWSz8iIiIqll4eDhef/11NGrUCEZGRvD29sa1a9dkMadOnUK7du2go6MDFxcXnD17Vja++Kne0k6v7tq1S/b78efOnUOPHj3QsGFDGBgYoEOHDjh9+jQOHz6M0aNHIysrCwqFAgqFAnPnzgXwtGiaOXMmrKysoK+vj06dOuHw4cOy5WzcuBFNmzaFnp4eBg8ejLt375a5/nl5eZg0aRIsLCygo6MDW1tbLFq0SBqfmZmJ8ePHw8zMDDo6OnBycsLvv/8ujd++fTtatWoFbW1t2NraYvny5bL529raYsGCBQgICIBSqURgYCAAICoqCt26dYOuri6sra0RHByMnJycMnN92bHwIyIiqmY5OTkICQlBbGwsDhw4ADU1NQwePBiFhYXSeG9vb9jb2yMuLg5z587F9OnTX3i5b731Fpo0aYLY2FjExcXh/fffh6amJrp06YKVK1fCwMAAqampSE1NlZY3evRonDhxAlu3bsX58+fxxhtvoG/fvrh69SoA4OTJkxgzZgyCgoIQHx+PHj16YMGCBWXm8cUXX+C3337DTz/9hMuXL2PLli2wtbUFABQWFsLLywtRUVHYsmULLl26hE8//RTq6uoAgLi4OAwfPhwjR47EhQsXMHfuXMyePRsbN26ULWPp0qVwcnJCXFwcZs+ejQsXLsDT0xNDhgzB+fPnsW3bNhw/fhyTJk164Xatz9hfTEREVM2GDh0qe//dd9/B1NQUly5dgpOTE8LCwlBQUID169dDT08PrVq1ws2bN/Huu+++0HKTk5MxY8YMtGzZEgDQokULaZxSqYRCoYC5ubk07Nq1a/jxxx9x8+ZNWFpaAgCmT5+O8PBwbNiwAQsXLsTnn38OT09PvP/++wAAOzs7REVFITw8vMw8WrRogddffx0KhQI2NjbSuP379+PUqVNITEyEnZ0dAKB58+bS+BUrVqBXr16YPXu2tLxLly5h6dKlCAgIkOJ69uwpK5ZHjRoFX19fTJ06VVr3L774Au7u7lizZo3K/tYze/yIiIiq2bVr1+Dr64vmzZvDwMAAzZo1A/C0IAKAxMREtGnTBnp6etI0rq6uL7zckJAQjBs3Dr1798ann35a4vRycWfOnIEQAnZ2dmjQoIH0OnLkiDRtYmJiidyel2tAQADi4+Nhb2+P4OBgRERESOPi4+PRpEkTqegrLjExEW5ubrJhbm5uuHr1KgoKCqRhLi4uspi4uDhs3LhRth6enp4oLCxEUlJSmfm+zNjjR0REVM18fHxgbW2NdevWwdLSEoWFhXBycpJuQhBCVHieampqJabLz8+XvZ87dy58fX2xZ88e7Nu3D3PmzMHWrVsxePDgUudZWFgIdXV1xMXFSadaizRo0KDSubZv3x5JSUnYt28f9u/fj+HDh6N379745ZdfoKurW+a0QgjZdYvPykFfX7/EurzzzjsIDg4uEdu0adMKr8PLgoUfERFRNbp79y4SExPx9ddfo2vXrgCA48ePy2IcHR2xefNmPHr0SCqEYmJiypyviYkJ7t+/j5ycHKnoKe0Zf3Z2drCzs8O0adPw5ptvYsOGDRg8eDC0tLRkPWYA0K5dOxQUFCA9PV3KtThHR8cSuT0vVwAwMDDAiBEjMGLECAwbNgx9+/bFvXv30Lp1a9y8eRNXrlwptdfP0dGxRHtFRUXBzs6uRHH6X+3bt8fFixfx6quvPjc3VcJTvURERNXI0NAQRkZG+Oabb/DXX3/h4MGDCAkJkcX4+vpCTU0NY8eOxaVLl7B3714sW7aszPl26tQJenp6+OCDD/DXX3/hhx9+kN3w8OjRI0yaNAmHDx/GjRs3cOLECcTGxsLBwQHA0zthHzx4gAMHDuDff//Fw4cPYWdnh7feegujRo3Cjh07kJSUhNjYWCxevBh79+4FAAQHByM8PBxLlizBlStXsHr16jKv7wOAzz77DFu3bsWff/6JK1eu4Oeff4a5uTkaNWoEd3d3dOvWDUOHDkVkZKTUM1g0z9DQUBw4cAAff/wxrly5gk2bNmH16tXPvfnlvffeQ3R0NCZOnIj4+HhcvXoVv/32GyZPnlzmdC87Fn5ERETVSE1NDVu3bkVcXBycnJwwbdo0LF26VBbToEED7N69G5cuXUK7du3w4YcfYvHixWXOt3HjxtiyZQv27t0LZ2dn/Pjjj9IjWYCnz/27e/cuRo0aBTs7OwwfPhxeXl6YN28eAKBLly6YMGECRowYARMTEyxZsgQAsGHDBowaNQqhoaGwt7fHgAEDcPLkSVhbWwMAOnfujG+//RarVq1C27ZtERERgf/9739l5tqgQQMsXrwYLi4ueO2113D9+nXs3bsXampPy5Dt27fjtddew5tvvglHR0fMnDlT6o1s3749fvrpJ2zduhVOTk746KOPMH/+fNmNHaVp3bo1jhw5gqtXr6Jr165o164dZs+eDQsLizKne9kpRGVO1lOlZWdnQ6lUIisrCwYGBlU785p+cn1VqIWn3xNR/fX48WMkJSWhWbNmKndX5uXLl9GyZUtcvXqVpy+rSVnbV7Uev2sQe/yIiIjquHv37uGXX36BgYGB1PNGVBm8uYOIiKiOGzt2LOLi4rBmzRpoa2vXdjpUj7HwIyIiquN27txZ2ynQS4KneomIiIhUBAs/IiKqd3hfIlUHVdiuWPgREVG9UfTA3qJfvCCqSg8fPgQAaGpq1nIm1YfX+BERUb2hoaEBPT093LlzB5qamtJz4IhehBACDx8+RHp6Oho1alTmL4LUdyz8iIio3lAoFLCwsEBSUhJu3LhR2+nQS6ZRo0YwNzev7TSqFQs/IiKqV7S0tNCiRQue7qUqpamp+VL39BVh4UdERPWOmpqayv1yB1FV4MURRERERCqChR8RERGRimDhR0RERKQiWPgRERERqQgWfkREREQqgoUfERERkYpg4UdERESkIlj4EREREakIFn5EREREKoKFHxEREZGKYOFHREREpCJqtfA7evQofHx8YGlpCYVCgV27dsnGBwQEQKFQyF6dO3eWxeTm5mLy5MkwNjaGvr4+BgwYgJs3b8piMjIy4OfnB6VSCaVSCT8/P2RmZspikpOT4ePjA319fRgbGyM4OLjED4BfuHAB7u7u0NXVhZWVFebPnw8hRJW1BxEREVF1qtXCLycnB23atMHq1aufGdO3b1+kpqZKr71798rGT506FTt37sTWrVtx/PhxPHjwAN7e3igoKJBifH19ER8fj/DwcISHhyM+Ph5+fn7S+IKCAvTv3x85OTk4fvw4tm7diu3btyM0NFSKyc7ORp8+fWBpaYnY2FisWrUKy5Ytw4oVK6qwRYiIiIiqj0ZtLtzLywteXl5lxmhra8Pc3LzUcVlZWfjuu++wefNm9O7dGwCwZcsWWFtbY//+/fD09ERiYiLCw8MRExODTp06AQDWrVsHV1dXXL58Gfb29oiIiMClS5eQkpICS0tLAMDy5csREBCATz75BAYGBggLC8Pjx4+xceNGaGtrw8nJCVeuXMGKFSsQEhIChUJRhS1DREREVPXq/DV+hw8fhqmpKezs7BAYGIj09HRpXFxcHPLz8+Hh4SENs7S0hJOTE6KiogAA0dHRUCqVUtEHAJ07d4ZSqZTFODk5SUUfAHh6eiI3NxdxcXFSjLu7O7S1tWUxt27dwvXr15+Zf25uLrKzs2UvIiIiotpQpws/Ly8vhIWF4eDBg1i+fDliY2PRs2dP5ObmAgDS0tKgpaUFQ0ND2XRmZmZIS0uTYkxNTUvM29TUVBZjZmYmG29oaAgtLa0yY4reF8WUZtGiRdK1hUqlEtbW1hVpAiIiIqIqU6unep9nxIgR0v+dnJzg4uICGxsb7NmzB0OGDHnmdEII2anX0k7DVkVM0Y0dZZ3mnTVrFkJCQqT32dnZLP6IiIioVtTpHr/iLCwsYGNjg6tXrwIAzM3NkZeXh4yMDFlcenq61Btnbm6O27dvl5jXnTt3ZDHFe+0yMjKQn59fZkzRaefiPYH/pa2tDQMDA9mLiIiIqDbUq8Lv7t27SElJgYWFBQCgQ4cO0NTURGRkpBSTmpqKhIQEdOnSBQDg6uqKrKwsnDp1Soo5efIksrKyZDEJCQlITU2VYiIiIqCtrY0OHTpIMUePHpU94iUiIgKWlpawtbWttnUmIiIiqiq1Wvg9ePAA8fHxiI+PBwAkJSUhPj4eycnJePDgAaZPn47o6Ghcv34dhw8fho+PD4yNjTF48GAAgFKpxNixYxEaGooDBw7g7NmzePvtt+Hs7Czd5evg4IC+ffsiMDAQMTExiImJQWBgILy9vWFvbw8A8PDwgKOjI/z8/HD27FkcOHAA06dPR2BgoNRD5+vrC21tbQQEBCAhIQE7d+7EwoULeUcvERER1Ru1eo3f6dOn0aNHD+l90bVw/v7+WLNmDS5cuIDvv/8emZmZsLCwQI8ePbBt2zY0bNhQmuazzz6DhoYGhg8fjkePHqFXr17YuHEj1NXVpZiwsDAEBwdLd/8OGDBA9uxAdXV17NmzB0FBQXBzc4Ouri58fX2xbNkyKUapVCIyMhITJ06Ei4sLDA0NERISIrt+j4iIiKguUwj+9ESNys7OhlKpRFZWVtVf77d7StXOryb4fF7bGRARET1XtR6/a1C9usaPiIiIiCqPhR8RERGRimDhR0RERKQiWPgRERERqQgWfkREREQqgoUfERERkYpg4UdERESkIlj4EREREakIFn5EREREKoKFHxEREZGKYOFHREREpCJY+BERERGpCBZ+RERERCqChR8RERGRimDhR0RERKQiWPgRERERqQgWfkREREQqgoUfERERkYpg4UdERESkIlj4EREREakIFn5EREREKoKFHxEREZGKYOFHREREpCJY+BERERGpCBZ+RERERCqChR8RERGRimDhR0RERKQiWPgRERERqQgWfkREREQqgoUfERERkYpg4UdERESkIlj4EREREakIFn5EREREKoKFHxEREZGKYOFHREREpCJY+BERERGpCBZ+RERERCqChR8RERGRimDhR0RERKQiarXwO3r0KHx8fGBpaQmFQoFdu3ZJ4/Lz8/Hee+/B2dkZ+vr6sLS0xKhRo3Dr1i3ZPLp37w6FQiF7jRw5UhaTkZEBPz8/KJVKKJVK+Pn5ITMzUxaTnJwMHx8f6Ovrw9jYGMHBwcjLy5PFXLhwAe7u7tDV1YWVlRXmz58PIUSVtgkRERFRdanVwi8nJwdt2rTB6tWrS4x7+PAhzpw5g9mzZ+PMmTPYsWMHrly5ggEDBpSIDQwMRGpqqvT6+uuvZeN9fX0RHx+P8PBwhIeHIz4+Hn5+ftL4goIC9O/fHzk5OTh+/Di2bt2K7du3IzQ0VIrJzs5Gnz59YGlpidjYWKxatQrLli3DihUrqrBFiIiIiKqPRm0u3MvLC15eXqWOUyqViIyMlA1btWoVOnbsiOTkZDRt2lQarqenB3Nz81Lnk5iYiPDwcMTExKBTp04AgHXr1sHV1RWXL1+Gvb09IiIicOnSJaSkpMDS0hIAsHz5cgQEBOCTTz6BgYEBwsLC8PjxY2zcuBHa2tpwcnLClStXsGLFCoSEhEChUFRFkxARERFVm3p1jV9WVhYUCgUaNWokGx4WFgZjY2O0atUK06dPx/3796Vx0dHRUCqVUtEHAJ07d4ZSqURUVJQU4+TkJBV9AODp6Ync3FzExcVJMe7u7tDW1pbF3Lp1C9evX39mzrm5ucjOzpa9iIiIiGpDrfb4VcTjx4/x/vvvw9fXFwYGBtLwt956C82aNYO5uTkSEhIwa9YsnDt3TuotTEtLg6mpaYn5mZqaIi0tTYoxMzOTjTc0NISWlpYsxtbWVhZTNE1aWhqaNWtWat6LFi3CvHnzKrfSRERERFWoXhR++fn5GDlyJAoLC/HVV1/JxgUGBkr/d3JyQosWLeDi4oIzZ86gffv2AFDqaVghhGx4ZWKKbuwo6zTvrFmzEBISIr3Pzs6GtbX1M+OJiIiIqkudP9Wbn5+P4cOHIykpCZGRkbLevtK0b98empqauHr1KgDA3Nwct2/fLhF3584dqcfO3Nxc6tkrkpGRgfz8/DJj0tPTAaBEb+F/aWtrw8DAQPYiIiIiqg11uvArKvquXr2K/fv3w8jI6LnTXLx4Efn5+bCwsAAAuLq6IisrC6dOnZJiTp48iaysLHTp0kWKSUhIQGpqqhQTEREBbW1tdOjQQYo5evSo7BEvERERsLS0LHEKmIiIiKguqtXC78GDB4iPj0d8fDwAICkpCfHx8UhOTsaTJ08wbNgwnD59GmFhYSgoKEBaWhrS0tKk4uvatWuYP38+Tp8+jevXr2Pv3r1444030K5dO7i5uQEAHBwc0LdvXwQGBiImJgYxMTEIDAyEt7c37O3tAQAeHh5wdHSEn58fzp49iwMHDmD69OkIDAyUeuh8fX2hra2NgIAAJCQkYOfOnVi4cCHv6CUiIqJ6QyFq8QnEhw8fRo8ePUoM9/f3x9y5c595w8ShQ4fQvXt3pKSk4O2330ZCQgIePHgAa2tr9O/fH3PmzEHjxo2l+Hv37iE4OBi//fYbAGDAgAFYvXq17O7g5ORkBAUF4eDBg9DV1YWvry+WLVsmu4v3woULmDhxIk6dOgVDQ0NMmDABH330UYUKv+zsbCiVSmRlZVX9ad/dU6p2fjXB5/PazoCIiOi5qvX4XYNqtfBTRSz8imHhR0RE9cDLUvjV6Wv8iIiIiKjqsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFsPAjIiIiUhG1WvgdPXoUPj4+sLS0hEKhwK5du2TjhRCYO3cuLC0toauri+7du+PixYuymNzcXEyePBnGxsbQ19fHgAEDcPPmTVlMRkYG/Pz8oFQqoVQq4efnh8zMTFlMcnIyfHx8oK+vD2NjYwQHByMvL08Wc+HCBbi7u0NXVxdWVlaYP38+hBBV1h5ERERE1alWC7+cnBy0adMGq1evLnX8kiVLsGLFCqxevRqxsbEwNzdHnz59cP/+fSlm6tSp2LlzJ7Zu3Yrjx4/jwYMH8Pb2RkFBgRTj6+uL+Ph4hIeHIzw8HPHx8fDz85PGFxQUoH///sjJycHx48exdetWbN++HaGhoVJMdnY2+vTpA0tLS8TGxmLVqlVYtmwZVqxYUQ0tQ0RERFT1FKISXVY9e/bEjh070KhRI9nw7OxsDBo0CAcPHqx4IgoFdu7ciUGDBgF42ttnaWmJqVOn4r333gPwtHfPzMwMixcvxjvvvIOsrCyYmJhg8+bNGDFiBADg1q1bsLa2xt69e+Hp6YnExEQ4OjoiJiYGnTp1AgDExMTA1dUVf/75J+zt7bFv3z54e3sjJSUFlpaWAICtW7ciICAA6enpMDAwwJo1azBr1izcvn0b2traAIBPP/0Uq1atws2bN6FQKMq1ntnZ2VAqlcjKyoKBgUGF26lMu6dU7fxqgs/ntZ0BERHRc1Xr8bsGVarH7/DhwyVOgwLA48ePcezYsRdOCgCSkpKQlpYGDw8PaZi2tjbc3d0RFRUFAIiLi0N+fr4sxtLSEk5OTlJMdHQ0lEqlVPQBQOfOnaFUKmUxTk5OUtEHAJ6ensjNzUVcXJwU4+7uLhV9RTG3bt3C9evXq2SdiYiIiKqTRkWCz58/L/3/0qVLSEtLk94XFBQgPDwcVlZWVZJY0bzNzMxkw83MzHDjxg0pRktLC4aGhiViiqZPS0uDqalpifmbmprKYoovx9DQEFpaWrIYW1vbEsspGtesWbNS1yM3Nxe5ubnS++zs7GevNBEREVE1qlDh17ZtWygUCigUCvTs2bPEeF1dXaxatarKkgNQ4hSqEOK5p1WLx5QWXxUxRWfJy8pn0aJFmDdvXpn5EhEREdWECp3qTUpKwrVr1yCEwKlTp5CUlCS9/vnnH2RnZ2PMmDFVkpi5uTkAyHoVASA9PV3qaTM3N0deXh4yMjLKjLl9+3aJ+d+5c0cWU3w5GRkZyM/PLzMmPT0dQMleyf+aNWsWsrKypFdKSkrZK05ERERUTSpU+NnY2MDW1haFhYVwcXGBjY2N9LKwsIC6unqVJdasWTOYm5sjMjJSGpaXl4cjR46gS5cuAIAOHTpAU1NTFpOamoqEhAQpxtXVFVlZWTh16pQUc/LkSWRlZcliEhISkJqaKsVERERAW1sbHTp0kGKOHj0qu7YxIiIClpaWJU4B/5e2tjYMDAxkLyIiIqLaUKFTvf915coVHD58GOnp6SgsLJSN++ijj8o1jwcPHuCvv/6S3iclJSE+Ph6NGzdG06ZNMXXqVCxcuBAtWrRAixYtsHDhQujp6cHX1xcAoFQqMXbsWISGhsLIyAiNGzfG9OnT4ezsjN69ewMAHBwc0LdvXwQGBuLrr78GAIwfPx7e3t6wt7cHAHh4eMDR0RF+fn5YunQp7t27h+nTpyMwMFAq1Hx9fTFv3jwEBATggw8+wNWrV7Fw4UJ89NFH5b6jl4iIiKg2VarwW7duHd59910YGxvD3Ny8xHVw5S38Tp8+jR49ekjvQ0JCAAD+/v7YuHEjZs6ciUePHiEoKAgZGRno1KkTIiIi0LBhQ2mazz77DBoaGhg+fDgePXqEXr16YePGjbLex7CwMAQHB0t3/w4YMED27EB1dXXs2bMHQUFBcHNzg66uLnx9fbFs2TIpRqlUIjIyEhMnToSLiwsMDQ0REhIi5UxERERU11XqOX42NjYICgqSnq9H5cfn+BXD5/gREVE9oNLP8cvIyMAbb7xR1bkQERERUTWqVOH3xhtvICIioqpzISIiIqJqVKlr/F599VXMnj0bMTExcHZ2hqampmx8cHBwlSRHRERERFWnUtf4PetXKoCnN3f8/fffL5TUy4zX+BXDa/yIiKgeeFmu8atUj19SUlJV50FERERE1axS1/gRERERUf1TqR6/5/0s2/r16yuVDBERERFVn0oVfsV/Gzc/Px8JCQnIzMxEz549qyQxIiIiIqpalSr8du7cWWJYYWEhgoKC0Lx58xdOioiIiIiqXpVd46empoZp06bhs88+q6pZEhEREVEVqtKbO65du4YnT55U5SyJiIiIqIpU6lRvSEiI7L0QAqmpqdizZw/8/f2rJDEiIiIiqlqVKvzOnj0re6+mpgYTExMsX778uXf8EhEREVHtqFThd+jQoarOg4iIiIiqWaUKvyJ37tzB5cuXoVAoYGdnBxMTk6rKi4iIiIiqWKVu7sjJycGYMWNgYWGBbt26oWvXrrC0tMTYsWPx8OHDqs6RiIiIiKpApQq/kJAQHDlyBLt370ZmZiYyMzPx66+/4siRIwgNDa3qHImIiIioClTqVO/27dvxyy+/oHv37tKwfv36QVdXF8OHD8eaNWuqKj8iIiIiqiKV6vF7+PAhzMzMSgw3NTXlqV4iIiKiOqpShZ+rqyvmzJmDx48fS8MePXqEefPmwdXVtcqSIyIiIqKqU6lTvStXroSXlxeaNGmCNm3aQKFQID4+Htra2oiIiKjqHImIiIioClSq8HN2dsbVq1exZcsW/PnnnxBCYOTIkXjrrbegq6tb1TkSERERURWoVOG3aNEimJmZITAwUDZ8/fr1uHPnDt57770qSY6IiIiIqk6lrvH7+uuv0bJlyxLDW7VqhbVr175wUkRERERU9SpV+KWlpcHCwqLEcBMTE6Smpr5wUkRERERU9SpV+FlbW+PEiRMlhp84cQKWlpYvnBQRERERVb1KXeM3btw4TJ06Ffn5+ejZsycA4MCBA5g5cyZ/uYOIiIiojqpU4Tdz5kzcu3cPQUFByMvLAwDo6Ojgvffew6xZs6o0QSIiIiKqGpUq/BQKBRYvXozZs2cjMTERurq6aNGiBbS1tas6PyIiIiKqIpUq/Io0aNAAr732WlXlQkRERETVqFI3dxARERFR/cPCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVUecLP1tbWygUihKviRMnAgACAgJKjOvcubNsHrm5uZg8eTKMjY2hr6+PAQMG4ObNm7KYjIwM+Pn5QalUQqlUws/PD5mZmbKY5ORk+Pj4QF9fH8bGxggODpZ+q5iIiIiorqvzhV9sbCxSU1OlV2RkJADgjTfekGL69u0ri9m7d69sHlOnTsXOnTuxdetWHD9+HA8ePIC3tzcKCgqkGF9fX8THxyM8PBzh4eGIj4+Hn5+fNL6goAD9+/dHTk4Ojh8/jq1bt2L79u0IDQ2t5hYgIiIiqhov9Fu9NcHExET2/tNPP8Urr7wCd3d3aZi2tjbMzc1LnT4rKwvfffcdNm/ejN69ewMAtmzZAmtra+zfvx+enp5ITExEeHg4YmJi0KlTJwDAunXr4OrqisuXL8Pe3h4RERG4dOkSUlJSYGlpCQBYvnw5AgIC8Mknn8DAwKA6Vp+IiIioytT5Hr//ysvLw5YtWzBmzBgoFApp+OHDh2Fqago7OzsEBgYiPT1dGhcXF4f8/Hx4eHhIwywtLeHk5ISoqCgAQHR0NJRKpVT0AUDnzp2hVCplMU5OTlLRBwCenp7Izc1FXFzcM3POzc1Fdna27EVERERUG+pV4bdr1y5kZmYiICBAGubl5YWwsDAcPHgQy5cvR2xsLHr27Inc3FwAQFpaGrS0tGBoaCibl5mZGdLS0qQYU1PTEsszNTWVxZiZmcnGGxoaQktLS4opzaJFi6TrBpVKJaytrSu17kREREQvqs6f6v2v7777Dl5eXrJetxEjRkj/d3JygouLC2xsbLBnzx4MGTLkmfMSQsh6Df/7/xeJKW7WrFkICQmR3mdnZ7P4IyIiolpRb3r8bty4gf3792PcuHFlxllYWMDGxgZXr14FAJibmyMvLw8ZGRmyuPT0dKkHz9zcHLdv3y4xrzt37shiivfsZWRkID8/v0RP4H9pa2vDwMBA9iIiIiKqDfWm8NuwYQNMTU3Rv3//MuPu3r2LlJQUWFhYAAA6dOgATU1N6W5gAEhNTUVCQgK6dOkCAHB1dUVWVhZOnTolxZw8eRJZWVmymISEBKSmpkoxERER0NbWRocOHapsPYmIiIiqS70o/AoLC7Fhwwb4+/tDQ+P/z04/ePAA06dPR3R0NK5fv47Dhw/Dx8cHxsbGGDx4MABAqVRi7NixCA0NxYEDB3D27Fm8/fbbcHZ2lu7ydXBwQN++fREYGIiYmBjExMQgMDAQ3t7esLe3BwB4eHjA0dERfn5+OHv2LA4cOIDp06cjMDCQvXhERERUL9SLwm///v1ITk7GmDFjZMPV1dVx4cIFDBw4EHZ2dvD394ednR2io6PRsGFDKe6zzz7DoEGDMHz4cLi5uUFPTw+7d++Gurq6FBMWFgZnZ2d4eHjAw8MDrVu3xubNm2XL2rNnD3R0dODm5obhw4dj0KBBWLZsWfU3ABEREVEVUAghRG0noUqys7OhVCqRlZVV9T2Fu6dU7fxqgs/ntZ0BERHRc1Xr8bsG1YsePyIiIiJ6cSz8iIiIiFQECz8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFVGnC7+5c+dCoVDIXubm5tJ4IQTmzp0LS0tL6Orqonv37rh48aJsHrm5uZg8eTKMjY2hr6+PAQMG4ObNm7KYjIwM+Pn5QalUQqlUws/PD5mZmbKY5ORk+Pj4QF9fH8bGxggODkZeXl61rTsRERFRVavThR8AtGrVCqmpqdLrwoUL0rglS5ZgxYoVWL16NWJjY2Fubo4+ffrg/v37UszUqVOxc+dObN26FcePH8eDBw/g7e2NgoICKcbX1xfx8fEIDw9HeHg44uPj4efnJ40vKChA//79kZOTg+PHj2Pr1q3Yvn07QkNDa6YRiIiIiKqARm0n8DwaGhqyXr4iQgisXLkSH374IYYMGQIA2LRpE8zMzPDDDz/gnXfeQVZWFr777jts3rwZvXv3BgBs2bIF1tbW2L9/Pzw9PZGYmIjw8HDExMSgU6dOAIB169bB1dUVly9fhr29PSIiInDp0iWkpKTA0tISALB8+XIEBATgk08+gYGBQQ21BhEREVHl1fkev6tXr8LS0hLNmjXDyJEj8ffffwMAkpKSkJaWBg8PDylWW1sb7u7uiIqKAgDExcUhPz9fFmNpaQknJycpJjo6GkqlUir6AKBz585QKpWyGCcnJ6noAwBPT0/k5uYiLi6uzPxzc3ORnZ0texERERHVhjpd+HXq1Anff/89/vjjD6xbtw5paWno0qUL7t69i7S0NACAmZmZbBozMzNpXFpaGrS0tGBoaFhmjKmpaYllm5qaymKKL8fQ0BBaWlpSzLMsWrRIunZQqVTC2tq6Ai1AREREVHXqdOHn5eWFoUOHwtnZGb1798aePXsAPD2lW0ShUMimEUKUGFZc8ZjS4isTU5pZs2YhKytLeqWkpJQZT0RERFRd6nThV5y+vj6cnZ1x9epV6bq/4j1u6enpUu+cubk58vLykJGRUWbM7du3Syzrzp07spjiy8nIyEB+fn6JnsDitLW1YWBgIHsRERER1YZ6Vfjl5uYiMTERFhYWaNasGczNzREZGSmNz8vLw5EjR9ClSxcAQIcOHaCpqSmLSU1NRUJCghTj6uqKrKwsnDp1Soo5efIksrKyZDEJCQlITU2VYiIiIqCtrY0OHTpU6zoTERERVZU6fVfv9OnT4ePjg6ZNmyI9PR0LFixAdnY2/P39oVAoMHXqVCxcuBAtWrRAixYtsHDhQujp6cHX1xcAoFQqMXbsWISGhsLIyAiNGzfG9OnTpVPHAODg4IC+ffsiMDAQX3/9NQBg/Pjx8Pb2hr29PQDAw8MDjo6O8PPzw9KlS3Hv3j1Mnz4dgYGB7MEjIiKieqNOF343b97Em2++iX///RcmJibo3LkzYmJiYGNjAwCYOXMmHj16hKCgIGRkZKBTp06IiIhAw4YNpXl89tln0NDQwPDhw/Ho0SP06tULGzduhLq6uhQTFhaG4OBg6e7fAQMGYPXq1dJ4dXV17NmzB0FBQXBzc4Ouri58fX2xbNmyGmoJIiIiohenEEKI2k5ClWRnZ0OpVCIrK6vqewt3T6na+dUEn89rOwMiIqLnqtbjdw2qV9f4EREREVHlsfAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFaNR2AkT04mbtuFDbKZRp0RDn2k6BiIjAHj8iIiIilcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFaNR2AkR13awdF2o7BSIioirBHj8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBG/uIKJqVx9ukFk0xLm2UyAiqnbs8SMiIiJSEezxo1pVH3qCiIiIXhbs8SMiIiJSESz8iIiIiFQECz8iIiIiFVGnC79FixbhtddeQ8OGDWFqaopBgwbh8uXLspiAgAAoFArZq3PnzrKY3NxcTJ48GcbGxtDX18eAAQNw8+ZNWUxGRgb8/PygVCqhVCrh5+eHzMxMWUxycjJ8fHygr68PY2NjBAcHIy8vr1rWnYiIiKiq1enC78iRI5g4cSJiYmIQGRmJJ0+ewMPDAzk5ObK4vn37IjU1VXrt3btXNn7q1KnYuXMntm7diuPHj+PBgwfw9vZGQUGBFOPr64v4+HiEh4cjPDwc8fHx8PPzk8YXFBSgf//+yMnJwfHjx7F161Zs374doaGh1dsIRERERFWkTt/VGx4eLnu/YcMGmJqaIi4uDt26dZOGa2trw9zcvNR5ZGVl4bvvvsPmzZvRu3dvAMCWLVtgbW2N/fv3w9PTE4mJiQgPD0dMTAw6deoEAFi3bh1cXV1x+fJl2NvbIyIiApcuXUJKSgosLS0BAMuXL0dAQAA++eQTGBgYVEcTEBEREVWZOt3jV1xWVhYAoHHjxrLhhw8fhqmpKezs7BAYGIj09HRpXFxcHPLz8+Hh4SENs7S0hJOTE6KiogAA0dHRUCqVUtEHAJ07d4ZSqZTFODk5SUUfAHh6eiI3NxdxcXHPzDk3NxfZ2dmyFxEREVFtqDeFnxACISEheP311+Hk5CQN9/LyQlhYGA4ePIjly5cjNjYWPXv2RG5uLgAgLS0NWlpaMDQ0lM3PzMwMaWlpUoypqWmJZZqamspizMzMZOMNDQ2hpaUlxZRm0aJF0nWDSqUS1tbWlWsAIiIiohdUp0/1/tekSZNw/vx5HD9+XDZ8xIgR0v+dnJzg4uICGxsb7NmzB0OGDHnm/IQQUCgU0vv//v9FYoqbNWsWQkJCpPfZ2dks/oiIiKhW1Isev8mTJ+O3337DoUOH0KRJkzJjLSwsYGNjg6tXrwIAzM3NkZeXh4yMDFlcenq61INnbm6O27dvl5jXnTt3ZDHFe/YyMjKQn59foifwv7S1tWFgYCB7EREREdWGOl34CSEwadIk7NixAwcPHkSzZs2eO83du3eRkpICCwsLAECHDh2gqamJyMhIKSY1NRUJCQno0qULAMDV1RVZWVk4deqUFHPy5ElkZWXJYhISEpCamirFREREQFtbGx06dKiS9SUiIiKqTnX6VO/EiRPxww8/4Ndff0XDhg2lHjelUgldXV08ePAAc+fOxdChQ2FhYYHr16/jgw8+gLGxMQYPHizFjh07FqGhoTAyMkLjxo0xffp0ODs7S3f5Ojg4oG/fvggMDMTXX38NABg/fjy8vb1hb28PAPDw8ICjoyP8/PywdOlS3Lt3D9OnT0dgYCB78YiIiKheqNM9fmvWrEFWVha6d+8OCwsL6bVt2zYAgLq6Oi5cuICBAwfCzs4O/v7+sLOzQ3R0NBo2bCjN57PPPsOgQYMwfPhwuLm5QU9PD7t374a6uroUExYWBmdnZ3h4eMDDwwOtW7fG5s2bpfHq6urYs2cPdHR04ObmhuHDh2PQoEFYtmxZzTUIERER0QtQCCFEbSehSrKzs6FUKpGVlVX1PYW7p1Tt/GrArPxxtZ0CEQBg0RDn2k6BiOqwaj1+16A63eNHRERERFWHhR8RERGRimDhR0RERKQiWPgRERERqQgWfkREREQqok4/x4+IiOqPWTsu1HYKZeKd20Ts8SMiIiJSGSz8iIiIiFQECz8iIiIiFcFr/IiI6oG6fv0cEdUP7PEjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLPyIiIiIVwcKPiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFaNR2AkREdcGsHRdqOwUiomrHHj8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQf4Ey1atDNJbWdQoXtajKztlMgIiKqFPb4EREREakIFn5EREREKoKneomISCXUh99jXjTEubZToJcce/yIiIiIVAQLPyIiIiIVwcKPiIiISEXwGr9K+Oqrr7B06VKkpqaiVatWWLlyJbp27VrbaRERUT1XH65DrOt4nWTZ2ONXQdu2bcPUqVPx4Ycf4uzZs+jatSu8vLyQnJxc26kRERERlUkhhBC1nUR90qlTJ7Rv3x5r1qyRhjk4OGDQoEFYtGjRc6fPzs6GUqlEVlYWDAwMqja53VOqdn414GTSvdpOQSXwodNEpCqqq8evWo/fNYineisgLy8PcXFxeP/992XDPTw8EBUVVUtZET0ffyGFiIgAFn4V8u+//6KgoABmZmay4WZmZkhLSyt1mtzcXOTm5krvs7KyADz9y6HKPcx9fkwdk/M4r7ZToDqqz18LajsFIqqHsrPXVdN8nx636/uJUhZ+laBQKGTvhRAlhhVZtGgR5s2bV2K4tbV1teRGRESk0t77qVpnf//+fSiVympdRnVi4VcBxsbGUFdXL9G7l56eXqIXsMisWbMQEhIivS8sLMS9e/dgZGT0zGKxIrKzs2FtbY2UlJR6fc1BXcd2rjls65rDtq4ZbOeaU51tLYTA/fv3YWlpWaXzrWks/CpAS0sLHTp0QGRkJAYPHiwNj4yMxMCBA0udRltbG9ra2rJhjRo1qvLcDAwMuEOpAWznmsO2rjls65rBdq451dXW9bmnrwgLvwoKCQmBn58fXFxc4Orqim+++QbJycmYMGFCbadGREREVCYWfhU0YsQI3L17F/Pnz0dqaiqcnJywd+9e2NjY1HZqRERERGVi4VcJQUFBCAoKqu00ADw9lTxnzpwSp5OparGdaw7buuawrWsG27nmsK2fjw9wJiIiIlIR/Mk2IiIiIhXBwo+IiIhIRbDwIyIiIlIRLPyIiIiIVAQLv3rgq6++QrNmzaCjo4MOHTrg2LFjZcYfOXIEHTp0gI6ODpo3b461a9fWUKb1W0XaeceOHejTpw9MTExgYGAAV1dX/PHHHzWYbf1W0W26yIkTJ6ChoYG2bdtWb4IviYq2c25uLj788EPY2NhAW1sbr7zyCtavX19D2dZvFW3rsLAwtGnTBnp6erCwsMDo0aNx9+7dGsq2fjp69Ch8fHxgaWkJhUKBXbt2PXcaHg9LIahO27p1q9DU1BTr1q0Tly5dElOmTBH6+vrixo0bpcb//fffQk9PT0yZMkVcunRJrFu3TmhqaopffvmlhjOvXyrazlOmTBGLFy8Wp06dEleuXBGzZs0Smpqa4syZMzWcef1T0bYukpmZKZo3by48PDxEmzZtaibZeqwy7TxgwADRqVMnERkZKZKSksTJkyfFiRMnajDr+qmibX3s2DGhpqYmPv/8c/H333+LY8eOiVatWolBgwbVcOb1y969e8WHH34otm/fLgCInTt3lhnP42HpWPjVcR07dhQTJkyQDWvZsqV4//33S42fOXOmaNmypWzYO++8Izp37lxtOb4MKtrOpXF0dBTz5s2r6tReOpVt6xEjRoj//e9/Ys6cOSz8yqGi7bxv3z6hVCrF3bt3ayK9l0pF23rp0qWiefPmsmFffPGFaNKkSbXl+LIpT+HH42HpeKq3DsvLy0NcXBw8PDxkwz08PBAVFVXqNNHR0SXiPT09cfr0aeTn51dbrvVZZdq5uMLCQty/fx+NGzeujhRfGpVt6w0bNuDatWuYM2dOdaf4UqhMO//2229wcXHBkiVLYGVlBTs7O0yfPh2PHj2qiZTrrcq0dZcuXXDz5k3s3bsXQgjcvn0bv/zyC/r3718TKasMHg9Lx1/uqMP+/fdfFBQUwMzMTDbczMwMaWlppU6TlpZWavyTJ0/w77//wsLCotryra8q087FLV++HDk5ORg+fHh1pPjSqExbX716Fe+//z6OHTsGDQ3ussqjMu38999/4/jx49DR0cHOnTvx77//IigoCPfu3eN1fmWoTFt36dIFYWFhGDFiBB4/fownT55gwIABWLVqVU2krDJ4PCwde/zqAYVCIXsvhCgx7HnxpQ0nuYq2c5Eff/wRc+fOxbZt22Bqalpd6b1UytvWBQUF8PX1xbx582BnZ1dT6b00KrJNFxYWQqFQICwsDB07dkS/fv2wYsUKbNy4kb1+5VCRtr506RKCg4Px0UcfIS4uDuHh4UhKSsKECRNqIlWVwuNhSfzzuQ4zNjaGurp6ib8a09PTS/wVU8Tc3LzUeA0NDRgZGVVbrvVZZdq5yLZt2zB27Fj8/PPP6N27d3Wm+VKoaFvfv38fp0+fxtmzZzFp0iQATwsUIQQ0NDQQERGBnj171kju9UlltmkLCwtYWVlBqVRKwxwcHCCEwM2bN9GiRYtqzbm+qkxbL1q0CG5ubpgxYwYAoHXr1tDX10fXrl2xYMECle2Jqmo8HpaOPX51mJaWFjp06IDIyEjZ8MjISHTp0qXUaVxdXUvER0REwMXFBZqamtWWa31WmXYGnvb0BQQE4IcffuC1OeVU0bY2MDDAhQsXEB8fL70mTJgAe3t7xMfHo1OnTjWVer1SmW3azc0Nt27dwoMHD6RhV65cgZqaGpo0aVKt+dZnlWnrhw8fQk1NfvhVV1cH8P89UvTieDx8hlq6qYTKqegxAd999524dOmSmDp1qtDX1xfXr18XQgjx/vvvCz8/Pym+6Pb1adOmiUuXLonvvvuOt6+XQ0Xb+YcffhAaGhriyy+/FKmpqdIrMzOztlah3qhoWxfHu3rLp6LtfP/+fdGkSRMxbNgwcfHiRXHkyBHRokULMW7cuNpahXqjom29YcMGoaGhIb766itx7do1cfz4ceHi4iI6duxYW6tQL9y/f1+cPXtWnD17VgAQK1asEGfPnpUem8PjYfmw8KsHvvzyS2FjYyO0tLRE+/btxZEjR6Rx/v7+wt3dXRZ/+PBh0a5dO6GlpSVsbW3FmjVrajjj+qki7ezu7i4AlHj5+/vXfOL1UEW36f9i4Vd+FW3nxMRE0bt3b6GrqyuaNGkiQkJCxMOHD2s46/qpom39xRdfCEdHR6GrqyssLCzEW2+9JW7evFnDWdcvhw4dKnO/y+Nh+SiEYL8yERERkSrgNX5EREREKoKFHxEREZGKYOFHREREpCJY+BERERGpCBZ+RERERCqChR8RERGRimDhR0RERKQiWPgRqShbW1usXLnyheaxceNGNGrUqMyYuXPnom3bttL7gIAADBo0SHrfvXt3TJ069YXyqKwTJ07A2dkZmpqaspxe1MOHDzF06FAYGBhAoVAgMzOzyub9sqrN7YBIlbDwI6JqNX36dBw4cOCZ43fs2IGPP/5Yel8VBWl5hYSEoG3btkhKSsLGjRurbL6bNm3CsWPHEBUVhdTUVCiVyudOc/36dSgUCsTHx1dZHkRExWnUdgJEVLXy8vKgpaVV22lIGjRogAYNGjxzfOPGjWswG7lr165hwoQJaNKkSZXP18HBAU5OTlU63/Kqa9tAXcuHSJWxx4+oDuvevTsmTZqESZMmoVGjRjAyMsL//vc//PeXFm1tbbFgwQIEBARAqVQiMDAQALB9+3a0atUK2trasLW1xfLly0vM//79+/D19UWDBg1gaWmJVatWycavWLECzs7O0NfXh7W1NYKCgvDgwYMS89m1axfs7Oygo6ODPn36ICUlRRpX/FRvaetYdIqve/fuuHHjBqZNmwaFQgGFQoGcnBwYGBjgl19+kU23e/du6Ovr4/79+6XONzc3F8HBwTA1NYWOjg5ef/11xMbGAvj/3rW7d+9izJgxUCgUz+zx27JlC1xcXNCwYUOYm5vD19cX6enpZa7P8uXLcfToUSgUCnTv3h0AoFAosGvXLllso0aNpOU2a9YMANCuXTvZdKWdAh00aBACAgKk98/aBqKiotCtWzfo6urC2toawcHByMnJeWbuRZ/V119/DWtra+jp6eGNN96Qnap+kXxOnDgBd3d36OnpwdDQEJ6ensjIyJCmKywsxMyZM9G4cWOYm5tj7ty5suU8b3u8ceMGfHx8YGhoCH19fbRq1Qp79+6Vxl+6dAn9+vVDgwYNYGZmBj8/P/z777/PbA+ilxELP6I6btOmTdDQ0MDJkyfxxRdf4LPPPsO3334ri1m6dCmcnJwQFxeH2bNnIy4uDsOHD8fIkSNx4cIFzJ07F7Nnzy5R3CxduhStW7fGmTNnMGvWLEybNg2RkZHSeDU1NXzxxRdISEjApk2bcPDgQcycOVM2j4cPH+KTTz7Bpk2bcOLECWRnZ2PkyJGVWtcdO3agSZMmmD9/PlJTU5Gamgp9fX2MHDkSGzZskMVu2LABw4YNQ8OGDUud18yZM7F9+3Zs2rQJZ86cwauvvgpPT0/cu3cP1tbWSE1NhYGBAVauXInU1FSMGDGi1Pnk5eXh448/xrlz57Br1y4kJSXJipzS1iEwMBCurq5ITU3Fjh07yrXup06dAgDs37+/QtMVKb4NXLhwAZ6enhgyZAjOnz+Pbdu24fjx45g0aVKZ8/nrr7/w008/Yffu3QgPD0d8fDwmTpxYoVxKyyc+Ph69evVCq1atEB0djePHj8PHxwcFBQXSNJs2bYK+vj5OnjyJJUuWYP78+RXaHidOnIjc3FwcPXoUFy5cwOLFi6Xe5tTUVLi7u6Nt27Y4ffo0wsPDcfv2bQwfPrzC60ZUrwkiqrPc3d2Fg4ODKCwslIa99957wsHBQXpvY2MjBg0aJJvO19dX9OnTRzZsxowZwtHRUTZd3759ZTEjRowQXl5ez8znp59+EkZGRtL7DRs2CAAiJiZGGpaYmCgAiJMnTwohhJgzZ45o06aNNN7f318MHDhQto5TpkyR5fXZZ5/Jlnvy5Emhrq4u/vnnHyGEEHfu3BGampri8OHDpeb54MEDoampKcLCwqRheXl5wtLSUixZskQaplQqxYYNG565vqU5deqUACDu37//zJgpU6YId3d32TAAYufOnbJh/11+UlKSACDOnj0riynePkIIMXDgQOHv7y+9L20b8PPzE+PHj5cNO3bsmFBTUxOPHj0qNe85c+YIdXV1kZKSIg3bt2+fUFNTE6mpqS+Uz5tvvinc3NxKXW7RfF9//XXZsNdee0289957z5ym+Pbo7Ows5s6dW2rs7NmzhYeHh2xYSkqKACAuX778zGUQvWzY40dUx3Xu3BkKhUJ67+rqiqtXr8p6SlxcXGTTJCYmws3NTTbMzc2txHSurq6yGFdXVyQmJkrvDx06hD59+sDKygoNGzbEqFGjcPfuXdnpQg0NDdnyW7ZsiUaNGsnm86I6duyIVq1a4fvvvwcAbN68GU2bNkW3bt1Kjb927Rry8/NlbaCpqYmOHTtWOK+zZ89i4MCBsLGxQcOGDaVTsMnJyZVbmWpSfBuIi4vDxo0bpWssGzRoAE9PTxQWFiIpKemZ82natKnsmkdXV1cUFhbi8uXLL5RPUY9fWVq3bi17b2FhITut/rztMTg4GAsWLICbmxvmzJmD8+fPS9PGxcXh0KFDsvZo2bIlgKfbC5GqYOFH9BLQ19eXvRdCyIrFomHlUTTdjRs30K9fPzg5OWH79u2Ii4vDl19+CQDIz88vdZrnDXsR48aNk073btiwAaNHj37mMorWtbQ2qEheOTk58PDwQIMGDbBlyxbExsZi586dAJ6eAq4IhUJR4jMo3o6lUVNTK9d0xbeBwsJCvPPOO4iPj5de586dw9WrV/HKK69UKO///lvZfHR1dZ+7LE1NzRLLLiwsBFC+7XHcuHH4+++/4efnhwsXLsDFxUW6brWwsBA+Pj6y9oiPj8fVq1ef+QcE0cuIhR9RHRcTE1PifYsWLaCurv7MaRwdHXH8+HHZsKioKNjZ2cmmK23eRb0gp0+fxpMnT7B8+XJ07twZdnZ2uHXrVollPXnyBKdPn5beX758GZmZmdJ8KkpLS0vWK1nk7bffRnJyMr744gtcvHgR/v7+z5zHq6++Ci0tLVkb5Ofn4/Tp03BwcCh3Ln/++Sf+/fdffPrpp+jatStatmxZ5o0dZTExMUFqaqr0/urVq3j48KH0vuiu1+LrXny6goICJCQkPHd57du3x8WLF/Hqq6+WeJV1h21ycrLsc46Ojoaamhrs7OxeKJ/WrVuX+Vif5ynv9mhtbY0JEyZgx44dCA0Nxbp16wD8f3vY2tqWaI/iRSrRy4yFH1Edl5KSgpCQEFy+fBk//vgjVq1ahSlTppQ5TWhoKA4cOICPP/4YV65cwaZNm7B69WpMnz5dFnfixAksWbIEV65cwZdffomff/5Zmvcrr7yCJ0+eYNWqVfj777+xefNmrF27tsSyNDU1MXnyZJw8eRJnzpzB6NGj0blzZ3Ts2LFS62tra4ujR4/in3/+kd1xaWhoiCFDhmDGjBnw8PAo8xEs+vr6ePfddzFjxgyEh4fj0qVLCAwMxMOHDzF27Nhy59K0aVNoaWlJbfDbb7/JnjlYET179sTq1atx5swZnD59GhMmTJD1cJmamkJXV1e66SArK0uabs+ePdizZw/+/PNPBAUFleuB0O+99x6io6MxceJEqWfrt99+w+TJk8ucTkdHB/7+/jh37hyOHTuG4OBgDB8+HObm5i+Uz6xZsxAbG4ugoCCcP38ef/75J9asWVPuu2rLsz1OnToVf/zxB5KSknDmzBkcPHhQKvQnTpyIe/fu4c0338SpU6fw999/IyIiAmPGjCn1Dw2ilxULP6I6btSoUXj06BE6duyIiRMnYvLkyRg/fnyZ07Rv3x4//fQTtm7dCicnJ3z00UeYP39+ibtRQ0NDERcXh3bt2uHjjz/G8uXL4enpCQBo27YtVqxYgcWLF8PJyQlhYWFYtGhRiWXp6enhvffeg6+vL1xdXaGrq4utW7dWen3nz5+P69ev45VXXoGJiYls3NixY5GXl4cxY8Y8dz6ffvophg4dCj8/P7Rv3x5//fUX/vjjDxgaGpY7FxMTE2zcuBE///wzHB0d8emnn2LZsmUVXicAWL58OaytrdGtWzf4+vpi+vTp0NPTk8ZraGjgiy++wNdffw1LS0sMHDgQADBmzBj4+/tj1KhRcHd3R7NmzdCjR4/nLq9169Y4cuQIrl69iq5du6Jdu3aYPXs2LCwsypzu1VdfxZAhQ9CvXz94eHjAyckJX331lTS+svnY2dkhIiIC586dQ8eOHeHq6opff/0VGhrle5xsebbHgoICTJw4EQ4ODujbty/s7e2l3C0tLXHixAkUFBTA09MTTk5OmDJlCpRKJdTUeCgk1aEQ5b3wh4hqXPfu3dG2bdsa+yWLui4sLAxTpkzBrVu3+EDgajB37lzs2rWLvx5C9BLjL3cQUZ338OFDJCUlYdGiRXjnnXdY9BERVRL7t4mozluyZAnatm0LMzMzzJo1q7bTISKqt3iql4iIiEhFsMePiIiISEWw8CMiIiJSESz8iIiIiFQECz8iIiIiFcHCj4iIiEhFsPAjIiIiUhEs/IiIiIhUBAs/IiIiIhXBwo+IiIhIRfwfGYLAxkFTEjQAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_hist_scores(X_test_segment, score = \"score\", score_adjusted = \"score_adjusted\", type_of_activity = type_of_activity)" ] }, { "cell_type": "code", "execution_count": 40, "id": "add631d7-0757-45a5-bb5b-f7f4b4baa961", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "projet-bdc2324-team1/Output_expected_CA/sport/\n" ] } ], "source": [ "# define path so save graphics\n", "\n", "# define type of activity \n", "type_of_activity = \"sport\"\n", "PATH = f\"projet-bdc2324-team1/Output_expected_CA/{type_of_activity}/\"\n", "print(PATH)" ] }, { "cell_type": "code", "execution_count": 68, "id": "3a5b5bd9-e033-4436-8c56-bf5fb61df87f", "metadata": {}, "outputs": [], "source": [ "# export png \n", "\n", "# plot adjusted scores and save (to be tested)\n", "plot_hist_scores(X_test_segment, score = \"score\", score_adjusted = \"score_adjusted\", type_of_activity = type_of_activity)\n", "\n", "image_buffer = io.BytesIO()\n", "plt.savefig(image_buffer, format='png')\n", "image_buffer.seek(0)\n", "file_name = \"hist_score_adjusted_\"\n", "FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".png\"\n", "with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n", " s3_file.write(image_buffer.read())\n", "plt.close()" ] }, { "cell_type": "markdown", "id": "e6fae260-fab8-4f51-90dc-9b6d7314c77b", "metadata": {}, "source": [ "## Compute number of tickets and CA by segment with the recalibrated score" ] }, { "cell_type": "code", "execution_count": 298, "id": "90c4c2b5-0ede-4001-889f-749cfbd9df04", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
quartilescore (%)score adjusted (%)has purchased (%)
0117.780.960.67
1236.122.492.83
2363.147.297.04
3486.0329.2129.20
\n", "
" ], "text/plain": [ " quartile score (%) score adjusted (%) has purchased (%)\n", "0 1 17.78 0.96 0.67\n", "1 2 36.12 2.49 2.83\n", "2 3 63.14 7.29 7.04\n", "3 4 86.03 29.21 29.20" ] }, "execution_count": 298, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_table_adjusted_scores = (100 * X_test_segment.groupby(\"quartile\")[[\"score\",\"score_adjusted\", \"has_purchased\"]].mean()).round(2).reset_index()\n", "X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f\"{col.replace('_', ' ')} (%)\" for col in X_test_table_adjusted_scores.columns if col in [\"score\",\"score_adjusted\", \"has_purchased\"]})\n", "X_test_table_adjusted_scores" ] }, { "cell_type": "code", "execution_count": 162, "id": "d0b8740c-cf48-4a3e-83cb-23d95059f62f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\\\begin{tabular}{lrrr}\\n\\\\toprule\\nquartile & score (%) & score adjusted (%) & has purchased (%) \\\\\\\\\\n\\\\midrule\\n1 & 13.250000 & 2.510000 & 1.570000 \\\\\\\\\\n2 & 33.890000 & 8.000000 & 9.850000 \\\\\\\\\\n3 & 63.060000 & 22.580000 & 21.470000 \\\\\\\\\\n4 & 90.520000 & 66.200000 & 65.010000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_table_adjusted_scores.to_latex(index=False)" ] }, { "cell_type": "code", "execution_count": 43, "id": "d6a04d3e-c454-43e4-ae4c-0746e928575b", "metadata": {}, "outputs": [], "source": [ "# comparison between score and adjusted score - export csv associated\n", "\n", "file_name = \"table_adjusted_score_\"\n", "FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n", "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n", " X_test_table_adjusted_scores.to_csv(file_out, index = False)" ] }, { "cell_type": "code", "execution_count": 106, "id": "a974589f-7952-4db2-bebf-7b69c6b09372", "metadata": {}, "outputs": [], "source": [ "def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n", " \n", " duration_ratio = duration_ref/duration_projection\n", "\n", " df_output = df\n", "\n", " df_output.loc[:,\"nb_tickets_projected\"] = df_output.loc[:,nb_tickets] / duration_ratio\n", " df_output.loc[:,\"total_amount_projected\"] = df_output.loc[:,total_amount] / duration_ratio\n", " \n", " df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n", " df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n", "\n", " df_output.loc[:,\"pace_purchase\"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)\n", " \n", " return df_output\n" ] }, { "cell_type": "code", "execution_count": 107, "id": "dd8a52e1-d06e-4790-8687-8e58e3e6b84e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1080/3982240549.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_output.loc[:,\"nb_tickets_projected\"] = df_output.loc[:,nb_tickets] / duration_ratio\n", "/tmp/ipykernel_1080/3982240549.py:8: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_output.loc[:,\"total_amount_projected\"] = df_output.loc[:,total_amount] / duration_ratio\n", "/tmp/ipykernel_1080/3982240549.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n", "/tmp/ipykernel_1080/3982240549.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n", "/tmp/ipykernel_1080/3982240549.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_output.loc[:,\"pace_purchase\"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelity...has_purchasedhas_purchased_estimscorequartilescore_adjustednb_tickets_projectedtotal_amount_projectednb_tickets_expectedtotal_amount_expectedpace_purchase
04.01.0100.001.00.05.1771875.1771870.0000000.01...0.00.00.00606610.0017132.82352970.5882350.0048360.12089017.0
11.01.055.001.00.0426.265613426.2656130.0000000.02...1.00.00.28884720.1024770.70588238.8235290.0723373.97852017.0
217.01.080.001.00.0436.033437436.0334370.0000000.02...0.00.00.10326410.03135612.00000056.4705880.3762741.77070117.0
34.01.0120.001.00.05.1964125.1964120.0000000.01...0.00.00.00892810.0025262.82352984.7058820.0071320.21396817.0
434.02.0416.001.00.0478.693148115.631470363.0616780.04...1.01.00.99280940.97488024.000000293.64705923.397112286.2705418.5
..................................................................
960911.01.067.311.01.0278.442257278.4422570.0000001.02...1.00.00.35176220.1323530.70588247.5129410.0934266.28847817.0
960921.01.061.411.01.0189.207373189.2073730.0000001.01...0.01.00.56781430.2697140.70588243.3482350.19038711.69164517.0
960930.00.00.000.00.0550.000000550.000000-1.0000000.01...0.00.00.00465210.0013120.0000000.0000000.0000000.000000NaN
960941.01.079.431.01.0279.312905279.3129050.0000001.01...0.00.00.29304220.1043620.70588256.0682350.0736685.85142017.0
960950.00.00.000.00.0550.000000550.000000-1.0000000.02...0.01.00.78785240.5107530.0000000.0000000.0000000.000000NaN
\n", "

96096 rows × 27 columns

\n", "
" ], "text/plain": [ " nb_tickets nb_purchases total_amount nb_suppliers \\\n", "0 4.0 1.0 100.00 1.0 \n", "1 1.0 1.0 55.00 1.0 \n", "2 17.0 1.0 80.00 1.0 \n", "3 4.0 1.0 120.00 1.0 \n", "4 34.0 2.0 416.00 1.0 \n", "... ... ... ... ... \n", "96091 1.0 1.0 67.31 1.0 \n", "96092 1.0 1.0 61.41 1.0 \n", "96093 0.0 0.0 0.00 0.0 \n", "96094 1.0 1.0 79.43 1.0 \n", "96095 0.0 0.0 0.00 0.0 \n", "\n", " vente_internet_max purchase_date_min purchase_date_max \\\n", "0 0.0 5.177187 5.177187 \n", "1 0.0 426.265613 426.265613 \n", "2 0.0 436.033437 436.033437 \n", "3 0.0 5.196412 5.196412 \n", "4 0.0 478.693148 115.631470 \n", "... ... ... ... \n", "96091 1.0 278.442257 278.442257 \n", "96092 1.0 189.207373 189.207373 \n", "96093 0.0 550.000000 550.000000 \n", "96094 1.0 279.312905 279.312905 \n", "96095 0.0 550.000000 550.000000 \n", "\n", " time_between_purchase nb_tickets_internet fidelity ... \\\n", "0 0.000000 0.0 1 ... \n", "1 0.000000 0.0 2 ... \n", "2 0.000000 0.0 2 ... \n", "3 0.000000 0.0 1 ... \n", "4 363.061678 0.0 4 ... \n", "... ... ... ... ... \n", "96091 0.000000 1.0 2 ... \n", "96092 0.000000 1.0 1 ... \n", "96093 -1.000000 0.0 1 ... \n", "96094 0.000000 1.0 1 ... \n", "96095 -1.000000 0.0 2 ... \n", "\n", " has_purchased has_purchased_estim score quartile score_adjusted \\\n", "0 0.0 0.0 0.006066 1 0.001713 \n", "1 1.0 0.0 0.288847 2 0.102477 \n", "2 0.0 0.0 0.103264 1 0.031356 \n", "3 0.0 0.0 0.008928 1 0.002526 \n", "4 1.0 1.0 0.992809 4 0.974880 \n", "... ... ... ... ... ... \n", "96091 1.0 0.0 0.351762 2 0.132353 \n", "96092 0.0 1.0 0.567814 3 0.269714 \n", "96093 0.0 0.0 0.004652 1 0.001312 \n", "96094 0.0 0.0 0.293042 2 0.104362 \n", "96095 0.0 1.0 0.787852 4 0.510753 \n", "\n", " nb_tickets_projected total_amount_projected nb_tickets_expected \\\n", "0 2.823529 70.588235 0.004836 \n", "1 0.705882 38.823529 0.072337 \n", "2 12.000000 56.470588 0.376274 \n", "3 2.823529 84.705882 0.007132 \n", "4 24.000000 293.647059 23.397112 \n", "... ... ... ... \n", "96091 0.705882 47.512941 0.093426 \n", "96092 0.705882 43.348235 0.190387 \n", "96093 0.000000 0.000000 0.000000 \n", "96094 0.705882 56.068235 0.073668 \n", "96095 0.000000 0.000000 0.000000 \n", "\n", " total_amount_expected pace_purchase \n", "0 0.120890 17.0 \n", "1 3.978520 17.0 \n", "2 1.770701 17.0 \n", "3 0.213968 17.0 \n", "4 286.270541 8.5 \n", "... ... ... \n", "96091 6.288478 17.0 \n", "96092 11.691645 17.0 \n", "96093 0.000000 NaN \n", "96094 5.851420 17.0 \n", "96095 0.000000 NaN \n", "\n", "[96096 rows x 27 columns]" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_segment = project_tickets_CA (X_test_segment, \"nb_purchases\", \"nb_tickets\", \"total_amount\", \"score_adjusted\", \n", " duration_ref=17, duration_projection=12)\n", "X_test_segment" ] }, { "cell_type": "code", "execution_count": 108, "id": "cb66a8ea-65f7-460f-b3fc-ba76a3b91faa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "quartile\n", "1 15.578346\n", "2 15.403993\n", "3 12.415869\n", "4 5.983541\n", "Name: pace_purchase, dtype: float64" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_segment.groupby(\"quartile\")[\"pace_purchase\"].mean()" ] }, { "cell_type": "code", "execution_count": 109, "id": "f58f9151-2f91-45df-abb7-1ddcf0652adc", "metadata": {}, "outputs": [], "source": [ "# generalization with a function\n", "\n", "def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount, pace_purchase,\n", " duration_ref=17, duration_projection=12) :\n", " \n", " # compute nb tickets estimated and total amount expected\n", " df_expected_CA = df.groupby(segment)[[nb_tickets_expected, total_amount_expected]].sum().reset_index()\n", " \n", " # number of customers by segment\n", " df_expected_CA.insert(1, \"size\", df.groupby(segment).size().values)\n", " \n", " # size in percent of all customers\n", " df_expected_CA.insert(2, \"size_perct\", 100 * df_expected_CA[\"size\"]/df_expected_CA[\"size\"].sum())\n", " \n", " # compute share of CA recovered\n", " duration_ratio=duration_ref/duration_projection\n", " \n", " df_expected_CA[\"revenue_recovered_perct\"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \\\n", " df.groupby(segment)[total_amount].sum().values\n", "\n", " df_drop_null_pace = df.dropna(subset=[pace_purchase])\n", " df_expected_CA[\"pace_purchase\"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values\n", " \n", " return df_expected_CA" ] }, { "cell_type": "code", "execution_count": 110, "id": "c8df6c80-43e8-4f00-9cd3-eb9022744313", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
quartilesizesize_perctnb_tickets_expectedtotal_amount_expectedrevenue_recovered_perctpace_purchase
015356555.741067.9127626.393.4215.58
122460725.614748.18169941.7215.2715.40
23971610.1111629.33309933.7932.4112.42
3482088.54215729.8610042427.5089.695.98
\n", "
" ], "text/plain": [ " quartile size size_perct nb_tickets_expected total_amount_expected \\\n", "0 1 53565 55.74 1067.91 27626.39 \n", "1 2 24607 25.61 4748.18 169941.72 \n", "2 3 9716 10.11 11629.33 309933.79 \n", "3 4 8208 8.54 215729.86 10042427.50 \n", "\n", " revenue_recovered_perct pace_purchase \n", "0 3.42 15.58 \n", "1 15.27 15.40 \n", "2 32.41 12.42 \n", "3 89.69 5.98 " ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", \n", " nb_tickets_expected=\"nb_tickets_expected\", total_amount_expected=\"total_amount_expected\", \n", " total_amount=\"total_amount\", pace_purchase=\"pace_purchase\"),2)\n", "\n", "X_test_expected_CA" ] }, { "cell_type": "code", "execution_count": 64, "id": "ac706ed7-defa-4df1-82e1-06f12fc1b6ad", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\\\begin{tabular}{lrrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) & pace purchase \\\\\\\\\\n\\\\midrule\\n1 & 53626 & 35.310000 & 398.260000 & 13949.330000 & 2.350000 & 16.480000 \\\\\\\\\\n2 & 55974 & 36.860000 & 3113.770000 & 101639.450000 & 6.240000 & 16.470000 \\\\\\\\\\n3 & 30435 & 20.040000 & 6214.350000 & 208267.220000 & 14.270000 & 15.710000 \\\\\\\\\\n4 & 11839 & 7.800000 & 72929.460000 & 1835702.430000 & 75.380000 & 11.480000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Création du dictionnaire de mapping pour les noms de colonnes\n", "mapping_dict = {col: col.replace(\"perct\", \"(%)\").replace(\"_\", \" \") for col in X_test_expected_CA.columns}\n", "\n", "X_test_expected_CA.rename(columns=mapping_dict).to_latex(index=False)" ] }, { "cell_type": "code", "execution_count": 122, "id": "771da0cf-c49f-4e7e-b52f-ebcfb0fb2df3", "metadata": {}, "outputs": [], "source": [ "# export summary table to the MinIO storage\n", "\n", "file_name = \"table_expected_CA_\"\n", "FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n", "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n", " X_test_expected_CA.to_csv(file_out, index = False)" ] }, { "cell_type": "code", "execution_count": 53, "id": "c805dc10-4d07-4f7d-a677-5461a92845d7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'projet-bdc2324-team1/Output_expected_CA/musique/table_expected_CA_musique.csv'" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "PATH = f\"projet-bdc2324-team1/Output_expected_CA/{type_of_activity}/\"\n", "file_name = \"table_expected_CA_\"\n", "FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n", "FILE_PATH_OUT_S3" ] }, { "cell_type": "markdown", "id": "e35ccfff-1845-41f0-9bde-f09b09b67877", "metadata": {}, "source": [ "## Test : vizu tables saved" ] }, { "cell_type": "code", "execution_count": 66, "id": "4e9e88e4-ea10-41f4-9bf1-20b55269a20d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
quartilescore (%)score adjusted (%)has purchased (%)
0113.252.511.57
1233.898.009.85
2363.0622.5821.47
3490.5266.2065.01
\n", "
" ], "text/plain": [ " quartile score (%) score adjusted (%) has purchased (%)\n", "0 1 13.25 2.51 1.57\n", "1 2 33.89 8.00 9.85\n", "2 3 63.06 22.58 21.47\n", "3 4 90.52 66.20 65.01" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = 'projet-bdc2324-team1/Output_expected_CA/sport/table_adjusted_scoresport.csv'\n", "\n", "with fs.open( path, mode=\"rb\") as file_in:\n", " df = pd.read_csv(file_in, sep=\",\")\n", "df" ] }, { "cell_type": "markdown", "id": "9c471bdd-25c2-420a-a8a1-3add9f003cbc", "metadata": {}, "source": [ "## Just to try, same computation with score instead of score adjusted\n", "\n", "seems overestimated : if only 14% of customers come back, how can we recover 22% of the revenue from the segment that is least likely to buy ?? ..." ] }, { "cell_type": "code", "execution_count": 80, "id": "53684a24-1809-465f-8e21-b9295e34582a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
quartilesizesize_perctnb_tickets_expectedtotal_amount_expectedperct_revenue_recovered
013741038.93419.769245.0821.71
122951730.7211549.06296522.0239.24
232013720.9629997.85954751.9163.34
3490329.40244655.8210736011.9597.72
\n", "
" ], "text/plain": [ " quartile size size_perct nb_tickets_expected total_amount_expected \\\n", "0 1 37410 38.93 419.76 9245.08 \n", "1 2 29517 30.72 11549.06 296522.02 \n", "2 3 20137 20.96 29997.85 954751.91 \n", "3 4 9032 9.40 244655.82 10736011.95 \n", "\n", " perct_revenue_recovered \n", "0 21.71 \n", "1 39.24 \n", "2 63.34 \n", "3 97.72 " ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_segment_bis = project_tickets_CA (X_test_segment, \"nb_tickets\", \"total_amount\", \"score\", duration_ref=1.5, duration_projection=1)\n", "\n", "X_test_expected_CA_bis = round(summary_expected_CA(df=X_test_segment_bis, segment=\"quartile\", nb_tickets_expected=\"nb_tickets_expected\", \n", " total_amount_expected=\"total_amount_expected\", total_amount=\"total_amount\"),2)\n", "\n", "X_test_expected_CA_bis" ] }, { "cell_type": "code", "execution_count": 81, "id": "7dc66d1e-da03-4513-96e4-d9a43ac0a2c8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "overall share of revenue recovered : 90.26 %\n" ] } ], "source": [ "print(\"overall share of revenue recovered : \", round(100 * duration_ratio * X_test_expected_CA_bis[\"total_amount_expected\"].sum() / \\\n", "X_test_segment_bis[\"total_amount\"].sum(),2), \"%\")" ] }, { "cell_type": "markdown", "id": "673f2969-7b9a-44c1-abf5-5679fca877ce", "metadata": {}, "source": [ "## Last pieces of analysis" ] }, { "cell_type": "code", "execution_count": 161, "id": "2365bb13-0f3f-49d5-bf91-52c92abebcee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "overall share of revenue recovered : 77.64%\n" ] } ], "source": [ "# global revenue recovered\n", "global_revenue_recovered = round(100 * duration_ratio * X_test_expected_CA[\"total_amount_expected\"].sum() / \\\n", "X_test_segment[\"total_amount\"].sum(),2)\n", "print(f\"overall share of revenue recovered : {global_revenue_recovered}%\")" ] }, { "cell_type": "code", "execution_count": 163, "id": "16b17f35-57dd-459a-8989-129143dc0952", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0.018093\n", "1 0.721519\n", "2 3.336101\n", "3 95.924287\n", "Name: total_amount_expected, dtype: float64" ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "100 * X_test_expected_CA[\"total_amount_expected\"]/X_test_expected_CA[\"total_amount_expected\"].sum()" ] }, { "cell_type": "code", "execution_count": 166, "id": "dee4a200-eefe-4377-8e80-59ad33edd3c0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "quartile\n", "1 0.320407\n", "2 5.685020\n", "3 11.339715\n", "4 82.654858\n", "Name: total_amount, dtype: float64" ] }, "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# le segment 4 représente 83% du CA actuel et 96% du CA lié aux anciens clients pour l'année prochaine\n", "100 * X_test_segment.groupby(\"quartile\")[\"total_amount\"].sum()/X_test_segment[\"total_amount\"].sum()" ] }, { "cell_type": "code", "execution_count": 177, "id": "c1e6f020-ef18-40b4-bfc1-19f98cb2796e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 96096.000000\n", "mean 207.475735\n", "std 4720.046248\n", "min -48831.800000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 60.000000\n", "max 624890.000000\n", "Name: total_amount, dtype: float64" ] }, "execution_count": 177, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test_segment[\"total_amount\"].describe() # total amount négatif ???\n" ] }, { "cell_type": "code", "execution_count": 184, "id": "d301a50e-7c68-40f0-9245-a4eea64c387b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 -4.883180e+04\n", "1 -6.483180e+04\n", "2 -7.683860e+04\n", "3 -8.683860e+04\n", "4 -9.683860e+04\n", " ... \n", "96091 1.802247e+07\n", "96092 1.839238e+07\n", "96093 1.877219e+07\n", "96094 1.931270e+07\n", "96095 1.993759e+07\n", "Name: total_amount, Length: 96096, dtype: float64" ] }, "execution_count": 184, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.cumsum(X_test_segment[\"total_amount\"].sort_values()).reset_index()[\"total_amount\"]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }