2297 lines
98 KiB
Plaintext
2297 lines
98 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "314bf34b-1f6d-4a99-8f82-aa71ebacdabc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import warnings\n",
|
||
"from datetime import date, timedelta, datetime\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"exec(open('../0_KPI_functions.py').read())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "a276822a-c389-429e-b249-8a9e47758bfc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Ignore warning\n",
|
||
"warnings.filterwarnings('ignore')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "f62b996c-4e17-40ea-83ba-f0cb60be7671",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/1',\n",
|
||
" 'bdc2324-data/10',\n",
|
||
" 'bdc2324-data/101',\n",
|
||
" 'bdc2324-data/11',\n",
|
||
" 'bdc2324-data/12',\n",
|
||
" 'bdc2324-data/13',\n",
|
||
" 'bdc2324-data/14',\n",
|
||
" 'bdc2324-data/2',\n",
|
||
" 'bdc2324-data/3',\n",
|
||
" 'bdc2324-data/4',\n",
|
||
" 'bdc2324-data/5',\n",
|
||
" 'bdc2324-data/6',\n",
|
||
" 'bdc2324-data/7',\n",
|
||
" 'bdc2324-data/8',\n",
|
||
" 'bdc2324-data/9']"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"BUCKET = \"bdc2324-data\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2c829aa8-2006-4e72-889b-7096dd55718b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Look at the time sequence of each company and compute inter time coverage"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"id": "e86864b7-4852-449a-8680-638559d56080",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sport = ['5', '6', '7', '8', '9']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"id": "7634ec57-4891-4684-8638-1e1643baca28",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def display_covering_time(df, company, datecover):\n",
|
||
" \"\"\"\n",
|
||
" This function draws the time coverage of each company\n",
|
||
" \"\"\"\n",
|
||
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
||
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
||
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
||
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
||
" return datecover"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 91,
|
||
"id": "53c83f51-822c-4e05-8c7c-89aa327603c6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def compute_time_intersection(datecover):\n",
|
||
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
||
" intersection = set.intersection(*timestamps_sets)\n",
|
||
" intersection_list = list(intersection)\n",
|
||
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
||
" return sorted(formated_dates)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"id": "eec152de-078e-44c4-ad6e-74ae6ba5c65a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def df_coverage_modelization(sport, coverage_train = 0.7):\n",
|
||
" \"\"\"\n",
|
||
" This function returns start_date, end_of_features and final dates\n",
|
||
" that help to construct train and test datasets\n",
|
||
" \"\"\"\n",
|
||
" datecover = {}\n",
|
||
" for company in sport:\n",
|
||
" df_products_purchased_reduced = display_databases(company, file_name = \"products_purchased_reduced\",\n",
|
||
" datetime_col = ['purchase_date'])\n",
|
||
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
||
" #print(datecover.keys())\n",
|
||
" dt_coverage = compute_time_intersection(datecover)\n",
|
||
" start_date = dt_coverage[0]\n",
|
||
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
||
" final_date = dt_coverage[-1]\n",
|
||
" return start_date, end_of_features, final_date\n",
|
||
" "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"id": "348f246a-bc2d-4bbc-ba05-aa825da15a69",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
|
||
"Couverture Company 5 : 2019-04-15 - 2023-11-09\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_6/products_purchased_reduced.csv\n",
|
||
"Couverture Company 6 : 2018-06-28 - 2023-11-08\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_7/products_purchased_reduced.csv\n",
|
||
"Couverture Company 7 : 2015-02-10 - 2023-11-08\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_8/products_purchased_reduced.csv\n",
|
||
"Couverture Company 8 : 2010-09-28 - 2023-11-08\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_9/products_purchased_reduced.csv\n",
|
||
"Couverture Company 9 : 2014-09-22 - 2023-10-24\n",
|
||
"dict_keys(['5', '6', '7', '8', '9'])\n",
|
||
"2019-04-15 2022-06-15 2023-10-23\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"start_date, end_of_features, final_date = df_coverage_modelization(sport, coverage_train = 0.7)\n",
|
||
"print(start_date, end_of_features, final_date )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "34ddc267-4daa-4926-9d54-5b13d4212eaa",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Look at common database between Sport companies"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"id": "389387fa-2046-4811-b8dd-6d524e91fe2e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['bdc2324-data/5',\n",
|
||
" 'bdc2324-data/6',\n",
|
||
" 'bdc2324-data/7',\n",
|
||
" 'bdc2324-data/8',\n",
|
||
" 'bdc2324-data/9']"
|
||
]
|
||
},
|
||
"execution_count": 101,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"companies = fs.ls(BUCKET)\n",
|
||
"companies = [company for company in companies if any(company.endswith(end) for end in sport)]\n",
|
||
"companies"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 107,
|
||
"id": "895fc2b3-c768-454d-bedb-54994e4d211a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Number of databases : 30\n",
|
||
"Number of common databases : 23\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"companies_database = {}\n",
|
||
"\n",
|
||
"for company in companies:\n",
|
||
" companies_database[company.split('/')[-1]] = [file.split('/')[-1].replace(company.split('/')[-1], '') for file in fs.ls(company)] \n",
|
||
"\n",
|
||
"all_database = companies_database[max(companies_database, key=lambda x: len(companies_database[x]))]\n",
|
||
"print(\"Number of databases : \",len(all_database))\n",
|
||
"\n",
|
||
"data_in_common = set(all_database)\n",
|
||
"\n",
|
||
"for key in companies_database:\n",
|
||
" diff_database = data_in_common.symmetric_difference(companies_database[key])\n",
|
||
" data_in_common = data_in_common - diff_database\n",
|
||
"\n",
|
||
"print(\"Number of common databases : \",len(data_in_common))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 121,
|
||
"id": "0c06517d-f5b7-4104-94fa-0e3f843c5881",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'campaign_stats.csv',\n",
|
||
" 'campaigns.csv',\n",
|
||
" 'categories.csv',\n",
|
||
" 'countries.csv',\n",
|
||
" 'currencies.csv',\n",
|
||
" 'customer_target_mappings.csv',\n",
|
||
" 'customersplus.csv',\n",
|
||
" 'event_types.csv',\n",
|
||
" 'events.csv',\n",
|
||
" 'facilities.csv',\n",
|
||
" 'link_stats.csv',\n",
|
||
" 'pricing_formulas.csv',\n",
|
||
" 'product_packs.csv',\n",
|
||
" 'products.csv',\n",
|
||
" 'products_groups.csv',\n",
|
||
" 'purchases.csv',\n",
|
||
" 'representation_category_capacities.csv',\n",
|
||
" 'representations.csv',\n",
|
||
" 'seasons.csv',\n",
|
||
" 'suppliers.csv',\n",
|
||
" 'target_types.csv',\n",
|
||
" 'targets.csv',\n",
|
||
" 'tickets.csv'}"
|
||
]
|
||
},
|
||
"execution_count": 121,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data_in_common"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1af245aa-44a7-453b-90f9-0c4bcc415cd0",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Investigate errors from data construction for company 6"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 108,
|
||
"id": "538a5ca2-a50d-4726-93eb-c2b0d0ab8400",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"directory_path = '6'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 143,
|
||
"id": "1ca3fb71-930a-441c-b35b-b98bca780606",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_6/customerplus_cleaned.csv\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_6/campaigns_information.csv\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_6/products_purchased_reduced.csv\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df_customerplus_clean = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
||
"df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
||
"df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 144,
|
||
"id": "2ad3052c-e9e6-4ef9-abe2-4b8b2306a2b9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"max_date = pd.to_datetime(final_date, utc = True, format = 'ISO8601') \n",
|
||
"end_features_date = pd.to_datetime(end_of_features, utc = True, format = 'ISO8601')\n",
|
||
"min_date = pd.to_datetime(start_date, utc = True, format = 'ISO8601')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 128,
|
||
"id": "146999f2-ab92-4b7c-8c57-2e3ac8c4dd88",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_6/campaigns_information.csv\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 133,
|
||
"id": "7448a7b9-3edf-4177-9df2-a260ebbee45e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Timestamp('2022-06-15 00:00:00+0000', tz='UTC')"
|
||
]
|
||
},
|
||
"execution_count": 133,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"end_features_date"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 136,
|
||
"id": "d8e954ab-65d4-4f36-8410-69bf664773a7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Shape campaigns_information : (1333010, 8)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>opened_at</th>\n",
|
||
" <th>sent_at</th>\n",
|
||
" <th>delivered_at</th>\n",
|
||
" <th>campaign_name</th>\n",
|
||
" <th>campaign_service_id</th>\n",
|
||
" <th>campaign_sent_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>2022-08-02 18:31:33+00:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Adhérents non ré-engagés</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>26135</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>2022-08-02 18:31:34+00:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Adhérents non ré-engagés</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3876</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>2022-08-02 18:31:35+00:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Adhérents non ré-engagés</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>26226</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>2022-08-02 18:31:35+00:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Adhérents non ré-engagés</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>25349</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>2022-08-02 18:31:34+00:00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Adhérents non ré-engagés</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>2022-08-02 18:31:36+00:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id opened_at sent_at delivered_at \\\n",
|
||
"0 1 38 NaT 2022-08-02 18:31:33+00:00 NaN \n",
|
||
"1 2 26135 NaT 2022-08-02 18:31:34+00:00 NaN \n",
|
||
"2 3 3876 NaT 2022-08-02 18:31:35+00:00 NaN \n",
|
||
"3 4 26226 NaT 2022-08-02 18:31:35+00:00 NaN \n",
|
||
"4 5 25349 NaT 2022-08-02 18:31:34+00:00 NaN \n",
|
||
"\n",
|
||
" campaign_name campaign_service_id campaign_sent_at \n",
|
||
"0 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||
"1 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||
"2 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||
"3 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 \n",
|
||
"4 Adhérents non ré-engagés 15 2022-08-02 18:31:36+00:00 "
|
||
]
|
||
},
|
||
"execution_count": 136,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"Shape campaigns_information : \", df_campaigns_information.shape)\n",
|
||
"df_campaigns_information.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 134,
|
||
"id": "93eceaf1-ce4c-4dfa-9c51-4fd016d09fc5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Timestamp('2022-08-02 18:31:33+0000', tz='UTC')"
|
||
]
|
||
},
|
||
"execution_count": 134,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_campaigns_information['sent_at'].min()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 137,
|
||
"id": "ea50cab4-1dae-4efe-ae3c-22b6f9ad1d26",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Timestamp('2023-11-07 10:08:16+0000', tz='UTC')"
|
||
]
|
||
},
|
||
"execution_count": 137,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_campaigns_information['sent_at'].max()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 127,
|
||
"id": "dcb87bc9-caf5-4655-9cfa-4a3dad504bac",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>opened_at</th>\n",
|
||
" <th>sent_at</th>\n",
|
||
" <th>delivered_at</th>\n",
|
||
" <th>campaign_name</th>\n",
|
||
" <th>campaign_service_id</th>\n",
|
||
" <th>campaign_sent_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [id, customer_id, opened_at, sent_at, delivered_at, campaign_name, campaign_service_id, campaign_sent_at]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 127,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Filtre de la base df_campaigns_information\n",
|
||
"df_campaigns_information = df_campaigns_information[(df_campaigns_information['sent_at'] <= end_features_date) & (df_campaigns_information['sent_at'] >= min_date)]\n",
|
||
"df_campaigns_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 145,
|
||
"id": "abe22e09-a041-4349-be8f-b0784f2f0a98",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ticket_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>supplier_name</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>49</th>\n",
|
||
" <td>91401</td>\n",
|
||
" <td>108392</td>\n",
|
||
" <td>1259025.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>caisse</td>\n",
|
||
" <td>2022-02-27 13:44:10.690000+00:00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>ligue 1 uber eats</td>\n",
|
||
" <td>stade de l'aube</td>\n",
|
||
" <td>honneur basse</td>\n",
|
||
" <td>olympique de marseille</td>\n",
|
||
" <td>saison 2021-2022</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>117</th>\n",
|
||
" <td>535527</td>\n",
|
||
" <td>31304</td>\n",
|
||
" <td>136629.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>adhésion</td>\n",
|
||
" <td>2022-04-28 15:47:52.790000+00:00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>ligue 1 uber eats</td>\n",
|
||
" <td>stade de l'aube</td>\n",
|
||
" <td>honneur basse</td>\n",
|
||
" <td>ac ajaccio</td>\n",
|
||
" <td>saison 2022-2023</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>274</th>\n",
|
||
" <td>547400</td>\n",
|
||
" <td>192</td>\n",
|
||
" <td>140477.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>adhésion</td>\n",
|
||
" <td>2022-04-28 15:47:54.053000+00:00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>ligue 1 uber eats</td>\n",
|
||
" <td>stade de l'aube</td>\n",
|
||
" <td>honneur basse</td>\n",
|
||
" <td>rc strasbourg</td>\n",
|
||
" <td>saison 2022-2023</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>304</th>\n",
|
||
" <td>84413</td>\n",
|
||
" <td>31388</td>\n",
|
||
" <td>20259.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>adhésion</td>\n",
|
||
" <td>2021-08-03 13:45:01.603000+00:00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>ligue 1 uber eats</td>\n",
|
||
" <td>stade de l'aube</td>\n",
|
||
" <td>vitoux haute</td>\n",
|
||
" <td>olympique de marseille</td>\n",
|
||
" <td>saison 2021-2022</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>311</th>\n",
|
||
" <td>407271</td>\n",
|
||
" <td>3265</td>\n",
|
||
" <td>90527.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>web [adhésion]</td>\n",
|
||
" <td>2022-05-26 09:15:40.993000+00:00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>ligue 1 uber eats</td>\n",
|
||
" <td>stade de l'aube</td>\n",
|
||
" <td>champagne basse</td>\n",
|
||
" <td>stade brestois 29</td>\n",
|
||
" <td>saison 2022-2023</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
|
||
"49 91401 108392 1259025.0 4 caisse \n",
|
||
"117 535527 31304 136629.0 4 adhésion \n",
|
||
"274 547400 192 140477.0 4 adhésion \n",
|
||
"304 84413 31388 20259.0 4 adhésion \n",
|
||
"311 407271 3265 90527.0 4 web [adhésion] \n",
|
||
"\n",
|
||
" purchase_date amount is_full_price \\\n",
|
||
"49 2022-02-27 13:44:10.690000+00:00 0.0 False \n",
|
||
"117 2022-04-28 15:47:52.790000+00:00 0.0 False \n",
|
||
"274 2022-04-28 15:47:54.053000+00:00 0.0 False \n",
|
||
"304 2021-08-03 13:45:01.603000+00:00 0.0 False \n",
|
||
"311 2022-05-26 09:15:40.993000+00:00 0.0 False \n",
|
||
"\n",
|
||
" name_event_types name_facilities name_categories \\\n",
|
||
"49 ligue 1 uber eats stade de l'aube honneur basse \n",
|
||
"117 ligue 1 uber eats stade de l'aube honneur basse \n",
|
||
"274 ligue 1 uber eats stade de l'aube honneur basse \n",
|
||
"304 ligue 1 uber eats stade de l'aube vitoux haute \n",
|
||
"311 ligue 1 uber eats stade de l'aube champagne basse \n",
|
||
"\n",
|
||
" name_events name_seasons \n",
|
||
"49 olympique de marseille saison 2021-2022 \n",
|
||
"117 ac ajaccio saison 2022-2023 \n",
|
||
"274 rc strasbourg saison 2022-2023 \n",
|
||
"304 olympique de marseille saison 2021-2022 \n",
|
||
"311 stade brestois 29 saison 2022-2023 "
|
||
]
|
||
},
|
||
"execution_count": 145,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Filtre de la base df_products_purchased_reduced\n",
|
||
"df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]\n",
|
||
"df_products_purchased_reduced.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 150,
|
||
"id": "ae7ef3a6-5b42-4a3c-a108-fec9f2ec4d32",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array(['caisse', 'adhésion', 'web [adhésion]', 'web [grand public]',\n",
|
||
" 'itr ticketmaster', 'itr fnac', nan, 'decathlon', 'boutique web',\n",
|
||
" 'boutique officielle'], dtype=object)"
|
||
]
|
||
},
|
||
"execution_count": 150,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_products_purchased_reduced[\"supplier_name\"].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 151,
|
||
"id": "942f58a5-8ed4-4b18-a7a2-bd296447fa6a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# KPI sur le comportement d'achat\n",
|
||
"tickets_information_copy = df_products_purchased_reduced.copy()\n",
|
||
"# Dummy : Canal de vente en ligne\n",
|
||
"liste_mots = ['en ligne', 'internet', 'web', 'net', 'vad', 'online'] # vad = vente à distance\n",
|
||
"tickets_information_copy['vente_internet'] = tickets_information_copy['supplier_name'].fillna('').str.contains('|'.join(liste_mots), case=False).astype(int)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "658b57cd-4fb8-4552-a582-972144b2af1c",
|
||
"metadata": {},
|
||
"source": [
|
||
"tickets_information_copy['vente_internet'] corrected by handling na"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "99a75c34-f393-433a-b3c2-dc3f6f2f3e7e",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Investigate train and test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "970302f5-4de2-46b4-a1ce-a5396f5330ab",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def display_databases(directory_path, file_name):\n",
|
||
" \"\"\"\n",
|
||
" This function returns the file from s3 storage \n",
|
||
" \"\"\"\n",
|
||
" file_path = \"projet-bdc2324-team1\" + \"/Generalization/\" + directory_path + \"/\" + file_name + \".csv\"\n",
|
||
" print(\"File path : \", file_path)\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in, sep=\",\") \n",
|
||
" return df "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "f5bfae82-04aa-44e1-9869-3f4fd5736b41",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/Generalization/sport/Train_set.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>5_6046652</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>af</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>5_3789159</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>5_5991148</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>af</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>5_3848065</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5_6154495</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>af</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 40 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 5_6046652 0.0 0.0 0.0 0.0 \n",
|
||
"1 5_3789159 0.0 0.0 0.0 0.0 \n",
|
||
"2 5_5991148 0.0 0.0 0.0 0.0 \n",
|
||
"3 5_3848065 0.0 0.0 0.0 0.0 \n",
|
||
"4 5_6154495 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 0.0 0.0 \n",
|
||
"1 0.0 0.0 0.0 \n",
|
||
"2 0.0 0.0 0.0 \n",
|
||
"3 0.0 0.0 0.0 \n",
|
||
"4 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
|
||
"0 0.0 0.0 ... af other \n",
|
||
"1 0.0 0.0 ... fr male \n",
|
||
"2 0.0 0.0 ... af other \n",
|
||
"3 0.0 0.0 ... fr male \n",
|
||
"4 0.0 0.0 ... af other \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
|
||
"0 0 0 1 0.0 0.0 \n",
|
||
"1 0 1 0 1.0 0.0 \n",
|
||
"2 0 0 1 0.0 0.0 \n",
|
||
"3 0 1 0 1.0 0.0 \n",
|
||
"4 0 0 1 0.0 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open y_has_purchased \n",
|
||
"0 0.0 0 0.0 \n",
|
||
"1 0.0 0 0.0 \n",
|
||
"2 0.0 0 0.0 \n",
|
||
"3 0.0 0 0.0 \n",
|
||
"4 0.0 0 0.0 \n",
|
||
"\n",
|
||
"[5 rows x 40 columns]"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train_sport = display_databases('sport', 'Train_set').fillna(0)\n",
|
||
"train_sport.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "56d5b12e-45e8-4312-869d-bde4d24900b6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"shape : (426449, 40)\n",
|
||
"number of na explained variable : 369102\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print('shape : ', train_sport.shape) \n",
|
||
"print('number of na explained variable : ', train_sport['y_has_purchased'].isna().sum())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "13bff83a-e931-4286-a3f2-1382462703f4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<Axes: xlabel='y_has_purchased', ylabel='count'>"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAGxCAYAAACgDPi4AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8w0lEQVR4nO3df1RUdeL/8deEMpLCRCE/xlh/fEpWwtwNOopWlAlogVnb6i41ySdjazE5hGQf61urfkoqf7XpZ61tK1djlz67RltpfCBLjFX8wTK7YOR6Wg1cQaxgELKB8H7/6HBPI2pK1xB7Ps655zj3vube99w9s7563ztXm2EYhgAAAPCtXdDbAwAAADhfUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCL9ensA3zfHjh3TwYMHFRgYKJvN1tvDAQAAp8EwDB05ckROp1MXXHDyeSmK1Xfs4MGDioyM7O1hAACAHqirq9Oll1560u0Uq+9YYGCgpK/+hwkKCurl0QAAgNPR0tKiyMhI8+/xk6FYfce6Lv8FBQVRrAAA6GO+6TYebl4HAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi/Tr7QHAerEPru3tIQDnpIold/X2EACc55ixAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALNKrxWr16tW68sorFRQUpKCgIMXHx+vtt982t6enp8tms/ks48aN89mH1+vVnDlzFBISooEDB2rq1Kk6cOCAT6apqUkul0sOh0MOh0Mul0vNzc0+mdraWqWmpmrgwIEKCQlRVlaW2tvbfTJVVVVKSEhQQECAhgwZokWLFskwDGtPCgAA6LN6tVhdeumlevLJJ7Vr1y7t2rVLEydO1C233KLdu3ebmcmTJ6u+vt5cNm7c6LOP7OxsFRYWqqCgQGVlZWptbVVKSoo6OzvNTFpamtxut4qKilRUVCS32y2Xy2Vu7+zs1M0336y2tjaVlZWpoKBA69ev19y5c81MS0uLEhMT5XQ6tXPnTq1cuVJLly7V8uXLz+IZAgAAfYnNOMemXC6++GItWbJEs2bNUnp6upqbm/X666+fMOvxeDR48GCtW7dOM2bMkCQdPHhQkZGR2rhxo5KTk1VTU6Po6GiVl5dr7NixkqTy8nLFx8frww8/VFRUlN5++22lpKSorq5OTqdTklRQUKD09HQ1NjYqKChIq1ev1vz583Xo0CHZ7XZJ0pNPPqmVK1fqwIEDstlsp/X5Wlpa5HA45PF4FBQU9C3P1onFPrj2rOwX6OsqltzV20MA0Eed7t/f58w9Vp2dnSooKFBbW5vi4+PN9Zs3b1ZoaKhGjhypjIwMNTY2mtsqKirU0dGhpKQkc53T6VRMTIy2bt0qSdq2bZscDodZqiRp3LhxcjgcPpmYmBizVElScnKyvF6vKioqzExCQoJZqroyBw8e1P79+609GQAAoE/q19sDqKqqUnx8vL744gsNGjRIhYWFio6OliRNmTJFP/3pTzV06FDt27dPjz76qCZOnKiKigrZ7XY1NDTI399fwcHBPvsMCwtTQ0ODJKmhoUGhoaHdjhsaGuqTCQsL89keHBwsf39/n8ywYcO6Hadr2/Dhw0/4+bxer7xer/m6paXldE8NAADoY3q9WEVFRcntdqu5uVnr16/XzJkzVVpaqujoaPPyniTFxMQoLi5OQ4cO1YYNG3TbbbeddJ+GYfhcmjvRZTorMl1XUU91GTAvL08LFy486XYAAHD+6PVLgf7+/rrssssUFxenvLw8jRkzRr/+9a9PmI2IiNDQoUO1d+9eSVJ4eLja29vV1NTkk2tsbDRnk8LDw3Xo0KFu+zp8+LBPpmtmqktTU5M6OjpOmem6LHn8bNfXzZ8/Xx6Px1zq6upOmgUAAH1brxer4xmG4XPp7Os+/fRT1dXVKSIiQpIUGxur/v37q6SkxMzU19erurpa48ePlyTFx8fL4/Fox44dZmb79u3yeDw+merqatXX15uZ4uJi2e12xcbGmpktW7b4PIKhuLhYTqez2yXCr7Pb7ebjJLoWAABwfurVYvXwww/r/fff1/79+1VVVaVHHnlEmzdv1h133KHW1lbl5uZq27Zt2r9/vzZv3qzU1FSFhITo1ltvlSQ5HA7NmjVLc+fO1aZNm1RZWak777xTo0eP1qRJkyRJo0aN0uTJk5WRkaHy8nKVl5crIyNDKSkpioqKkiQlJSUpOjpaLpdLlZWV2rRpk3Jzc5WRkWEWobS0NNntdqWnp6u6ulqFhYVavHixcnJyTvsXgQAA4PzWq/dYHTp0SC6XS/X19XI4HLryyitVVFSkxMREHT16VFVVVVq7dq2am5sVERGhG264Qa+++qoCAwPNfaxYsUL9+vXT9OnTdfToUd14441as2aN/Pz8zEx+fr6ysrLMXw9OnTpVq1atMrf7+flpw4YNyszM1IQJExQQEKC0tDQtXbrUzDgcDpWUlGj27NmKi4tTcHCwcnJylJOT8x2cKQAA0Becc8+xOt/xHCug9/AcKwA91eeeYwUAANDXUawAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACL9GqxWr16ta688koFBQUpKChI8fHxevvtt83thmFowYIFcjqdCggI0PXXX6/du3f77MPr9WrOnDkKCQnRwIEDNXXqVB04cMAn09TUJJfLJYfDIYfDIZfLpebmZp9MbW2tUlNTNXDgQIWEhCgrK0vt7e0+maqqKiUkJCggIEBDhgzRokWLZBiGtScFAAD0Wb1arC699FI9+eST2rVrl3bt2qWJEyfqlltuMcvT008/reXLl2vVqlXauXOnwsPDlZiYqCNHjpj7yM7OVmFhoQoKClRWVqbW1lalpKSos7PTzKSlpcntdquoqEhFRUVyu91yuVzm9s7OTt18881qa2tTWVmZCgoKtH79es2dO9fMtLS0KDExUU6nUzt37tTKlSu1dOlSLV++/Ds4UwAAoC+wGefYlMvFF1+sJUuW6O6775bT6VR2drYeeughSV/NToWFhempp57SvffeK4/Ho8GDB2vdunWaMWOGJOngwYOKjIzUxo0blZycrJqaGkVHR6u8vFxjx46VJJWXlys+Pl4ffvihoqKi9PbbbyslJUV1dXVyOp2SpIKCAqWnp6uxsVFBQUFavXq15s+fr0OHDslut0uSnnzySa1cuVIHDhyQzWY7rc/X0tIih8Mhj8ejoKAgq0+fJCn2wbVnZb9AX1ex5K7eHgKAPup0//4+Z+6x6uzsVEFBgdra2hQfH699+/apoaFBSUlJZsZutyshIUFbt26VJFVUVKijo8Mn43Q6FRMTY2a2bdsmh8NhlipJGjdunBwOh08mJibGLFWSlJycLK/Xq4qKCjOTkJBglqquzMGDB7V///6Tfi6v16uWlhafBQAAnJ96vVhVVVVp0KBBstvtuu+++1RYWKjo6Gg1NDRIksLCwnzyYWFh5raGhgb5+/srODj4lJnQ0NBuxw0NDfXJHH+c4OBg+fv7nzLT9borcyJ5eXnmvV0Oh0ORkZGnPiEAAKDP6vViFRUVJbfbrfLycv3yl7/UzJkz9cEHH5jbj7/EZhjGN152Oz5zorwVma6rqKcaz/z58+XxeMylrq7ulGMHAAB9V68XK39/f1122WWKi4tTXl6exowZo1//+tcKDw+X1H02qLGx0ZwpCg8PV3t7u5qamk6ZOXToULfjHj582Cdz/HGamprU0dFxykxjY6Ok7rNqX2e3281fPXYtAADg/NTrxep4hmHI6/Vq+PDhCg8PV0lJibmtvb1dpaWlGj9+vCQpNjZW/fv398nU19erurrazMTHx8vj8WjHjh1mZvv27fJ4PD6Z6upq1dfXm5ni4mLZ7XbFxsaamS1btvg8gqG4uFhOp1PDhg2z/kQAAIA+p1eL1cMPP6z3339f+/fvV1VVlR555BFt3rxZd9xxh2w2m7Kzs7V48WIVFhaqurpa6enpuvDCC5WWliZJcjgcmjVrlubOnatNmzapsrJSd955p0aPHq1JkyZJkkaNGqXJkycrIyND5eXlKi8vV0ZGhlJSUhQVFSVJSkpKUnR0tFwulyorK7Vp0ybl5uYqIyPDnGFKS0uT3W5Xenq6qqurVVhYqMWLFysnJ+e0fxEIAADOb/168+CHDh2Sy+VSfX29HA6HrrzyShUVFSkxMVGSNG/ePB09elSZmZlqamrS2LFjVVxcrMDAQHMfK1asUL9+/TR9+nQdPXpUN954o9asWSM/Pz8zk5+fr6ysLPPXg1OnTtWqVavM7X5+ftqwYYMyMzM1YcIEBQQEKC0tTUuXLjUzDodDJSUlmj17tuLi4hQcHKycnBzl5OSc7dMEAAD6iHPuOVbnO55jBfQenmMFoKf63HOsAAAA+jqKFQAAgEUoVgAAABahWAEAAFiEYgUAAGARihUAAIBFKFYAAAAWoVgBAABYhGIFAABgEYoVAACARShWAAAAFqFYAQAAWIRiBQAAYBGKFQAAgEUoVgAAABahWAEAAFiEYgUAAGARihUAAIBFKFYAAAAWoVgBAABYhGIFAABgEYoVAACARShWAAAAFqFYAQAAWIRiBQAAYBGKFQAAgEUoVgAAABahWAEAAFiEYgUAAGARihUAAIBFKFYAAAAWoVgBAABYhGIFAABgEYoVAACARShWAAAAFqFYAQAAWIRiBQAAYJFeLVZ5eXm6+uqrFRgYqNDQUE2bNk179uzxyaSnp8tms/ks48aN88l4vV7NmTNHISEhGjhwoKZOnaoDBw74ZJqamuRyueRwOORwOORyudTc3OyTqa2tVWpqqgYOHKiQkBBlZWWpvb3dJ1NVVaWEhAQFBARoyJAhWrRokQzDsO6kAACAPqtXi1Vpaalmz56t8vJylZSU6Msvv1RSUpLa2tp8cpMnT1Z9fb25bNy40Wd7dna2CgsLVVBQoLKyMrW2tiolJUWdnZ1mJi0tTW63W0VFRSoqKpLb7ZbL5TK3d3Z26uabb1ZbW5vKyspUUFCg9evXa+7cuWampaVFiYmJcjqd2rlzp1auXKmlS5dq+fLlZ+kMAQCAvqRfbx68qKjI5/XLL7+s0NBQVVRU6LrrrjPX2+12hYeHn3AfHo9HL774otatW6dJkyZJkl555RVFRkbqnXfeUXJysmpqalRUVKTy8nKNHTtWkvTCCy8oPj5ee/bsUVRUlIqLi/XBBx+orq5OTqdTkrRs2TKlp6friSeeUFBQkPLz8/XFF19ozZo1stvtiomJ0T//+U8tX75cOTk5stlsZ+M0AQCAPuKcusfK4/FIki6++GKf9Zs3b1ZoaKhGjhypjIwMNTY2mtsqKirU0dGhpKQkc53T6VRMTIy2bt0qSdq2bZscDodZqiRp3LhxcjgcPpmYmBizVElScnKyvF6vKioqzExCQoLsdrtP5uDBg9q/f/8JP5PX61VLS4vPAgAAzk/nTLEyDEM5OTm65pprFBMTY66fMmWK8vPz9e6772rZsmXauXOnJk6cKK/XK0lqaGiQv7+/goODffYXFhamhoYGMxMaGtrtmKGhoT6ZsLAwn+3BwcHy9/c/ZabrdVfmeHl5eeZ9XQ6HQ5GRkad9TgAAQN/Sq5cCv+7+++/XP/7xD5WVlfmsnzFjhvnnmJgYxcXFaejQodqwYYNuu+22k+7PMAyfS3MnukxnRabrxvWTXQacP3++cnJyzNctLS2UKwAAzlPnxIzVnDlz9MYbb+i9997TpZdeespsRESEhg4dqr1790qSwsPD1d7erqamJp9cY2OjOZsUHh6uQ4cOddvX4cOHfTLHzzo1NTWpo6PjlJmuy5LHz2R1sdvtCgoK8lkAAMD5qVeLlWEYuv/++/Xaa6/p3Xff1fDhw7/xPZ9++qnq6uoUEREhSYqNjVX//v1VUlJiZurr61VdXa3x48dLkuLj4+XxeLRjxw4zs337dnk8Hp9MdXW16uvrzUxxcbHsdrtiY2PNzJYtW3wewVBcXCyn06lhw4b1/EQAAIDzQq8Wq9mzZ+uVV17RH/7wBwUGBqqhoUENDQ06evSoJKm1tVW5ubnatm2b9u/fr82bNys1NVUhISG69dZbJUkOh0OzZs3S3LlztWnTJlVWVurOO+/U6NGjzV8Jjho1SpMnT1ZGRobKy8tVXl6ujIwMpaSkKCoqSpKUlJSk6OhouVwuVVZWatOmTcrNzVVGRoY5y5SWlia73a709HRVV1ersLBQixcv5heBAABAUi8Xq9WrV8vj8ej6669XRESEubz66quSJD8/P1VVVemWW27RyJEjNXPmTI0cOVLbtm1TYGCguZ8VK1Zo2rRpmj59uiZMmKALL7xQb775pvz8/MxMfn6+Ro8eraSkJCUlJenKK6/UunXrzO1+fn7asGGDBgwYoAkTJmj69OmaNm2ali5damYcDodKSkp04MABxcXFKTMzUzk5OT73UAEAgO8vm8Fjw79TLS0tcjgc8ng8Z+1+q9gH156V/QJ9XcWSu3p7CAD6qNP9+/ucuHkdAADgfECxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACzSo2I1ceJENTc3d1vf0tKiiRMnftsxAQAA9Ek9KlabN29We3t7t/VffPGF3n///W89KAAAgL6o35mE//GPf5h//uCDD9TQ0GC+7uzsVFFRkYYMGWLd6AAAAPqQMypWP/rRj2Sz2WSz2U54yS8gIEArV660bHAAAAB9yRkVq3379skwDI0YMUI7duzQ4MGDzW3+/v4KDQ2Vn5+f5YMEAADoC86oWA0dOlSSdOzYsbMyGAAAgL7sjIrV1/3zn//U5s2b1djY2K1oPfbYY996YAAAAH1Nj34V+MILLyg6OlqPPfaY/vznP6uwsNBcXn/99dPeT15enq6++moFBgYqNDRU06ZN0549e3wyhmFowYIFcjqdCggI0PXXX6/du3f7ZLxer+bMmaOQkBANHDhQU6dO1YEDB3wyTU1Ncrlccjgccjgccrlc3R4ZUVtbq9TUVA0cOFAhISHKysrq9uvHqqoqJSQkKCAgQEOGDNGiRYtkGMZpf2YAAHD+6lGxevzxx/XEE0+ooaFBbrdblZWV5vK3v/3ttPdTWlqq2bNnq7y8XCUlJfryyy+VlJSktrY2M/P0009r+fLlWrVqlXbu3Knw8HAlJibqyJEjZiY7O1uFhYUqKChQWVmZWltblZKSos7OTjOTlpYmt9utoqIiFRUVye12y+Vymds7Ozt18803q62tTWVlZSooKND69es1d+5cM9PS0qLExEQ5nU7t3LlTK1eu1NKlS7V8+fKenEYAAHCesRk9mG4JCgqS2+3WiBEjLB3M4cOHFRoaqtLSUl133XUyDENOp1PZ2dl66KGHJH01OxUWFqannnpK9957rzwejwYPHqx169ZpxowZkqSDBw8qMjJSGzduVHJysmpqahQdHa3y8nKNHTtWklReXq74+Hh9+OGHioqK0ttvv62UlBTV1dXJ6XRKkgoKCpSenq7GxkYFBQVp9erVmj9/vg4dOiS73S5JevLJJ7Vy5UodOHBANpvtGz9jS0uLHA6HPB6PgoKCLD1/XWIfXHtW9gv0dRVL7urtIQDoo0737+8ezVj99Kc/VXFxcY8HdzIej0eSdPHFF0v66leIDQ0NSkpKMjN2u10JCQnaunWrJKmiokIdHR0+GafTqZiYGDOzbds2ORwOs1RJ0rhx4+RwOHwyMTExZqmSpOTkZHm9XlVUVJiZhIQEs1R1ZQ4ePKj9+/dbeSoAAEAf1KOb1y+77DI9+uijKi8v1+jRo9W/f3+f7VlZWWe8T8MwlJOTo2uuuUYxMTGSZD6ANCwszCcbFhamjz/+2Mz4+/srODi4W6br/Q0NDQoNDe12zNDQUJ/M8ccJDg6Wv7+/T2bYsGHdjtO1bfjw4d2O4fV65fV6zdctLS2nOAsAAKAv61Gx+u1vf6tBgwaptLRUpaWlPttsNluPitX999+vf/zjHyorK+u27fhLbIZhfONlt+MzJ8pbkem6knqy8eTl5WnhwoWnHCsAADg/9KhY7du3z9JBzJkzR2+88Ya2bNmiSy+91FwfHh4u6avZoIiICHN9Y2OjOVMUHh6u9vZ2NTU1+cxaNTY2avz48Wbm0KFD3Y57+PBhn/1s377dZ3tTU5M6Ojp8Ml//Z3y6jiN1n1XrMn/+fOXk5JivW1paFBkZearTAQAA+qge3WNlFcMwdP/99+u1117Tu+++2+1S2vDhwxUeHq6SkhJzXXt7u0pLS83SFBsbq/79+/tk6uvrVV1dbWbi4+Pl8Xi0Y8cOM7N9+3Z5PB6fTHV1terr681McXGx7Ha7YmNjzcyWLVt8HsFQXFwsp9PZ7RJhF7vdrqCgIJ8FAACcn3o0Y3X33XefcvtLL710WvuZPXu2/vCHP+gvf/mLAgMDzdkgh8OhgIAA2Ww2ZWdna/Hixbr88st1+eWXa/HixbrwwguVlpZmZmfNmqW5c+fqkksu0cUXX6zc3FyNHj1akyZNkiSNGjVKkydPVkZGhp5//nlJ0i9+8QulpKQoKipKkpSUlKTo6Gi5XC4tWbJEn332mXJzc5WRkWGWobS0NC1cuFDp6el6+OGHtXfvXi1evFiPPfbYaf0iEAAAnN96VKyampp8Xnd0dKi6ulrNzc0n/MeZT2b16tWSpOuvv95n/csvv6z09HRJ0rx583T06FFlZmaqqalJY8eOVXFxsQIDA838ihUr1K9fP02fPl1Hjx7VjTfeqDVr1vj8u4X5+fnKysoyfz04depUrVq1ytzu5+enDRs2KDMzUxMmTFBAQIDS0tK0dOlSM+NwOFRSUqLZs2crLi5OwcHBysnJ8bnUBwAAvr969ByrEzl27JgyMzM1YsQIzZs3z4pdnpd4jhXQe3iOFYCeOqvPsTrhji64QA888IBWrFhh1S4BAAD6FEtvXv/oo4/05ZdfWrlLAACAPqNH91gdf0+RYRiqr6/Xhg0bNHPmTEsGBgAA0Nf0qFhVVlb6vL7gggs0ePBgLVu27Bt/MQgAAHC+6lGxeu+996weBwAAQJ/Xo2LV5fDhw9qzZ49sNptGjhypwYMHWzUuAACAPqdHN6+3tbXp7rvvVkREhK677jpde+21cjqdmjVrlj7//HOrxwgAANAn9KhY5eTkqLS0VG+++aaam5vV3Nysv/zlLyotLdXcuXOtHiMAAECf0KNLgevXr9ef//xnnyem33TTTQoICND06dPNJ6oDAAB8n/Roxurzzz9XWFhYt/WhoaFcCgQAAN9bPSpW8fHx+tWvfqUvvvjCXHf06FEtXLhQ8fHxlg0OAACgL+nRpcBnnnlGU6ZM0aWXXqoxY8bIZrPJ7XbLbreruLjY6jECAAD0CT0qVqNHj9bevXv1yiuv6MMPP5RhGPrZz36mO+64QwEBAVaPEQAAoE/oUbHKy8tTWFiYMjIyfNa/9NJLOnz4sB566CFLBgcAANCX9Ogeq+eff14//OEPu62/4oor9Nxzz33rQQEAAPRFPSpWDQ0NioiI6LZ+8ODBqq+v/9aDAgAA6It6VKwiIyP117/+tdv6v/71r3I6nd96UAAAAH1Rj+6xuueee5Sdna2Ojg5NnDhRkrRp0ybNmzePJ68DAIDvrR4Vq3nz5umzzz5TZmam2tvbJUkDBgzQQw89pPnz51s6QAAAgL6iR8XKZrPpqaee0qOPPqqamhoFBATo8ssvl91ut3p8AAAAfUaPilWXQYMG6eqrr7ZqLAAAAH1aj25eBwAAQHcUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALBIrxarLVu2KDU1VU6nUzabTa+//rrP9vT0dNlsNp9l3LhxPhmv16s5c+YoJCREAwcO1NSpU3XgwAGfTFNTk1wulxwOhxwOh1wul5qbm30ytbW1Sk1N1cCBAxUSEqKsrCy1t7f7ZKqqqpSQkKCAgAANGTJEixYtkmEYlp0PAADQt/VqsWpra9OYMWO0atWqk2YmT56s+vp6c9m4caPP9uzsbBUWFqqgoEBlZWVqbW1VSkqKOjs7zUxaWprcbreKiopUVFQkt9stl8tlbu/s7NTNN9+strY2lZWVqaCgQOvXr9fcuXPNTEtLixITE+V0OrVz506tXLlSS5cu1fLlyy08IwAAoC/r15sHnzJliqZMmXLKjN1uV3h4+Am3eTwevfjii1q3bp0mTZokSXrllVcUGRmpd955R8nJyaqpqVFRUZHKy8s1duxYSdILL7yg+Ph47dmzR1FRUSouLtYHH3yguro6OZ1OSdKyZcuUnp6uJ554QkFBQcrPz9cXX3yhNWvWyG63KyYmRv/85z+1fPly5eTkyGazWXhmAABAX3TO32O1efNmhYaGauTIkcrIyFBjY6O5raKiQh0dHUpKSjLXOZ1OxcTEaOvWrZKkbdu2yeFwmKVKksaNGyeHw+GTiYmJMUuVJCUnJ8vr9aqiosLMJCQkyG63+2QOHjyo/fv3n5XPDgAA+pZzulhNmTJF+fn5evfdd7Vs2TLt3LlTEydOlNfrlSQ1NDTI399fwcHBPu8LCwtTQ0ODmQkNDe2279DQUJ9MWFiYz/bg4GD5+/ufMtP1uitzIl6vVy0tLT4LAAA4P/XqpcBvMmPGDPPPMTExiouL09ChQ7VhwwbddtttJ32fYRg+l+ZOdJnOikzXjeunugyYl5enhQsXnnQ7AAA4f5zTM1bHi4iI0NChQ7V3715JUnh4uNrb29XU1OSTa2xsNGeTwsPDdejQoW77Onz4sE/m+FmnpqYmdXR0nDLTdVny+Jmsr5s/f748Ho+51NXVnclHBgAAfUifKlaffvqp6urqFBERIUmKjY1V//79VVJSYmbq6+tVXV2t8ePHS5Li4+Pl8Xi0Y8cOM7N9+3Z5PB6fTHV1terr681McXGx7Ha7YmNjzcyWLVt8HsFQXFwsp9OpYcOGnXTMdrtdQUFBPgsAADg/9Wqxam1tldvtltvtliTt27dPbrdbtbW1am1tVW5urrZt26b9+/dr8+bNSk1NVUhIiG699VZJksPh0KxZszR37lxt2rRJlZWVuvPOOzV69GjzV4KjRo3S5MmTlZGRofLycpWXlysjI0MpKSmKioqSJCUlJSk6Oloul0uVlZXatGmTcnNzlZGRYRahtLQ02e12paenq7q6WoWFhVq8eDG/CAQAAKZevcdq165duuGGG8zXOTk5kqSZM2dq9erVqqqq0tq1a9Xc3KyIiAjdcMMNevXVVxUYGGi+Z8WKFerXr5+mT5+uo0eP6sYbb9SaNWvk5+dnZvLz85WVlWX+enDq1Kk+z87y8/PThg0blJmZqQkTJiggIEBpaWlaunSpmXE4HCopKdHs2bMVFxen4OBg5eTkmGMGAACwGTw6/DvV0tIih8Mhj8dz1i4Lxj649qzsF+jrKpbc1dtDANBHne7f333qHisAAIBzGcUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwSK8Wqy1btig1NVVOp1M2m02vv/66z3bDMLRgwQI5nU4FBATo+uuv1+7du30yXq9Xc+bMUUhIiAYOHKipU6fqwIEDPpmmpia5XC45HA45HA65XC41Nzf7ZGpra5WamqqBAwcqJCREWVlZam9v98lUVVUpISFBAQEBGjJkiBYtWiTDMCw7HwAAoG/r1WLV1tamMWPGaNWqVSfc/vTTT2v58uVatWqVdu7cqfDwcCUmJurIkSNmJjs7W4WFhSooKFBZWZlaW1uVkpKizs5OM5OWlia3262ioiIVFRXJ7XbL5XKZ2zs7O3XzzTerra1NZWVlKigo0Pr16zV37lwz09LSosTERDmdTu3cuVMrV67U0qVLtXz58rNwZgAAQF9kM86RKRebzabCwkJNmzZN0lezVU6nU9nZ2XrooYckfTU7FRYWpqeeekr33nuvPB6PBg8erHXr1mnGjBmSpIMHDyoyMlIbN25UcnKyampqFB0drfLyco0dO1aSVF5ervj4eH344YeKiorS22+/rZSUFNXV1cnpdEqSCgoKlJ6ersbGRgUFBWn16tWaP3++Dh06JLvdLkl68skntXLlSh04cEA2m+20PmdLS4scDoc8Ho+CgoKsPIWm2AfXnpX9An1dxZK7ensIAPqo0/37+5y9x2rfvn1qaGhQUlKSuc5utyshIUFbt26VJFVUVKijo8Mn43Q6FRMTY2a2bdsmh8NhlipJGjdunBwOh08mJibGLFWSlJycLK/Xq4qKCjOTkJBglqquzMGDB7V///6Tfg6v16uWlhafBQAAnJ/O2WLV0NAgSQoLC/NZHxYWZm5raGiQv7+/goODT5kJDQ3ttv/Q0FCfzPHHCQ4Olr+//ykzXa+7MieSl5dn3tvlcDgUGRl56g8OAAD6rHO2WHU5/hKbYRjfeNnt+MyJ8lZkuq6inmo88+fPl8fjMZe6urpTjh0AAPRd52yxCg8Pl9R9NqixsdGcKQoPD1d7e7uamppOmTl06FC3/R8+fNgnc/xxmpqa1NHRccpMY2OjpO6zal9nt9sVFBTkswAAgPPTOVushg8frvDwcJWUlJjr2tvbVVpaqvHjx0uSYmNj1b9/f59MfX29qqurzUx8fLw8Ho927NhhZrZv3y6Px+OTqa6uVn19vZkpLi6W3W5XbGysmdmyZYvPIxiKi4vldDo1bNgw608AAADoc3q1WLW2tsrtdsvtdkv66oZ1t9ut2tpa2Ww2ZWdna/HixSosLFR1dbXS09N14YUXKi0tTZLkcDg0a9YszZ07V5s2bVJlZaXuvPNOjR49WpMmTZIkjRo1SpMnT1ZGRobKy8tVXl6ujIwMpaSkKCoqSpKUlJSk6OhouVwuVVZWatOmTcrNzVVGRoY5w5SWlia73a709HRVV1ersLBQixcvVk5Ozmn/IhAAAJzf+vXmwXft2qUbbrjBfJ2TkyNJmjlzptasWaN58+bp6NGjyszMVFNTk8aOHavi4mIFBgaa71mxYoX69eun6dOn6+jRo7rxxhu1Zs0a+fn5mZn8/HxlZWWZvx6cOnWqz7Oz/Pz8tGHDBmVmZmrChAkKCAhQWlqali5damYcDodKSko0e/ZsxcXFKTg4WDk5OeaYAQAAzpnnWH1f8BwroPfwHCsAPdXnn2MFAADQ11CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAItQrAAAACxCsQIAALAIxQoAAMAiFCsAAACLUKwAAAAs0q+3BwAAOH2xD67t7SEA56SKJXf19hAkMWMFAABgmXO6WC1YsEA2m81nCQ8PN7cbhqEFCxbI6XQqICBA119/vXbv3u2zD6/Xqzlz5igkJEQDBw7U1KlTdeDAAZ9MU1OTXC6XHA6HHA6HXC6XmpubfTK1tbVKTU3VwIEDFRISoqysLLW3t5+1zw4AAPqec7pYSdIVV1yh+vp6c6mqqjK3Pf3001q+fLlWrVqlnTt3Kjw8XImJiTpy5IiZyc7OVmFhoQoKClRWVqbW1lalpKSos7PTzKSlpcntdquoqEhFRUVyu91yuVzm9s7OTt18881qa2tTWVmZCgoKtH79es2dO/e7OQkAAKBPOOfvserXr5/PLFUXwzD0zDPP6JFHHtFtt90mSfr973+vsLAw/eEPf9C9994rj8ejF198UevWrdOkSZMkSa+88ooiIyP1zjvvKDk5WTU1NSoqKlJ5ebnGjh0rSXrhhRcUHx+vPXv2KCoqSsXFxfrggw9UV1cnp9MpSVq2bJnS09P1xBNPKCgo6Ds6GwAA4Fx2zs9Y7d27V06nU8OHD9fPfvYz/etf/5Ik7du3Tw0NDUpKSjKzdrtdCQkJ2rp1qySpoqJCHR0dPhmn06mYmBgzs23bNjkcDrNUSdK4cePkcDh8MjExMWapkqTk5GR5vV5VVFScvQ8PAAD6lHN6xmrs2LFau3atRo4cqUOHDunxxx/X+PHjtXv3bjU0NEiSwsLCfN4TFhamjz/+WJLU0NAgf39/BQcHd8t0vb+hoUGhoaHdjh0aGuqTOf44wcHB8vf3NzMn4/V65fV6zdctLS2n89EBAEAfdE4XqylTpph/Hj16tOLj4/Uf//Ef+v3vf69x48ZJkmw2m897DMPotu54x2dOlO9J5kTy8vK0cOHCU2YAAMD54Zy/FPh1AwcO1OjRo7V3717zvqvjZ4waGxvN2aXw8HC1t7erqanplJlDhw51O9bhw4d9Mscfp6mpSR0dHd1mso43f/58eTwec6mrqzuDTwwAAPqSPlWsvF6vampqFBERoeHDhys8PFwlJSXm9vb2dpWWlmr8+PGSpNjYWPXv398nU19fr+rqajMTHx8vj8ejHTt2mJnt27fL4/H4ZKqrq1VfX29miouLZbfbFRsbe8ox2+12BQUF+SwAAOD8dE5fCszNzVVqaqp+8IMfqLGxUY8//rhaWlo0c+ZM2Ww2ZWdna/Hixbr88st1+eWXa/HixbrwwguVlpYmSXI4HJo1a5bmzp2rSy65RBdffLFyc3M1evRo81eCo0aN0uTJk5WRkaHnn39ekvSLX/xCKSkpioqKkiQlJSUpOjpaLpdLS5Ys0Weffabc3FxlZGRQlAAAgOmcLlYHDhzQz3/+c33yyScaPHiwxo0bp/Lycg0dOlSSNG/ePB09elSZmZlqamrS2LFjVVxcrMDAQHMfK1asUL9+/TR9+nQdPXpUN954o9asWSM/Pz8zk5+fr6ysLPPXg1OnTtWqVavM7X5+ftqwYYMyMzM1YcIEBQQEKC0tTUuXLv2OzgQAAOgLbIZhGL09iO+TlpYWORwOeTyeszbbxb8lBpzYufJviX0bfL+BEzvb3+/T/fu7T91jBQAAcC6jWAEAAFiEYgUAAGARihUAAIBFKFYAAAAWoVgBAABYhGIFAABgEYoVAACARShWAAAAFqFYAQAAWIRiBQAAYBGKFQAAgEUoVgAAABahWAEAAFiEYgUAAGARihUAAIBFKFYAAAAWoVgBAABYhGIFAABgEYoVAACARShWAAAAFqFYAQAAWIRiBQAAYBGKFQAAgEUoVgAAABahWAEAAFiEYgUAAGARihUAAIBFKFYAAAAWoVgBAABYhGIFAABgEYoVAACARShWAAAAFqFYAQAAWIRiBQAAYBGKFQAAgEUoVgAAABahWPXAb37zGw0fPlwDBgxQbGys3n///d4eEgAAOAdQrM7Qq6++quzsbD3yyCOqrKzUtddeqylTpqi2tra3hwYAAHoZxeoMLV++XLNmzdI999yjUaNG6ZlnnlFkZKRWr17d20MDAAC9jGJ1Btrb21VRUaGkpCSf9UlJSdq6dWsvjQoAAJwr+vX2APqSTz75RJ2dnQoLC/NZHxYWpoaGhhO+x+v1yuv1mq89Ho8kqaWl5ayNs9N79KztG+jLzub37rvC9xs4sbP9/e7av2EYp8xRrHrAZrP5vDYMo9u6Lnl5eVq4cGG39ZGRkWdlbABOzrHyvt4eAoCz5Lv6fh85ckQOh+Ok2ylWZyAkJER+fn7dZqcaGxu7zWJ1mT9/vnJycszXx44d02effaZLLrnkpGUM54+WlhZFRkaqrq5OQUFBvT0cABbi+/39YhiGjhw5IqfTecocxeoM+Pv7KzY2ViUlJbr11lvN9SUlJbrllltO+B673S673e6z7qKLLjqbw8Q5KCgoiP/jBc5TfL+/P041U9WFYnWGcnJy5HK5FBcXp/j4eP32t79VbW2t7ruPSwwAAHzfUazO0IwZM/Tpp59q0aJFqq+vV0xMjDZu3KihQ4f29tAAAEAvo1j1QGZmpjIzM3t7GOgD7Ha7fvWrX3W7HAyg7+P7jROxGd/0u0EAAACcFh4QCgAAYBGKFQAAgEUoVgAAABahWAHfwm9+8xsNHz5cAwYMUGxsrN5///1T5ktLSxUbG6sBAwZoxIgReu65576jkQI4E1u2bFFqaqqcTqdsNptef/31b3wP329IFCugx1599VVlZ2frkUceUWVlpa699lpNmTJFtbW1J8zv27dPN910k6699lpVVlbq4YcfVlZWltavX/8djxzAN2lra9OYMWO0atWq08rz/UYXfhUI9NDYsWN11VVXafXq1ea6UaNGadq0acrLy+uWf+ihh/TGG2+opqbGXHfffffp73//u7Zt2/adjBnAmbPZbCosLNS0adNOmuH7jS7MWAE90N7eroqKCiUlJfmsT0pK0tatW0/4nm3btnXLJycna9euXero6DhrYwVw9vH9RheKFdADn3zyiTo7O7v949thYWHd/pHuLg0NDSfMf/nll/rkk0/O2lgBnH18v9GFYgV8Czabzee1YRjd1n1T/kTrAfQ9fL8hUayAHgkJCZGfn1+32anGxsZu/9XaJTw8/IT5fv366ZJLLjlrYwVw9vH9RheKFdAD/v7+io2NVUlJic/6kpISjR8//oTviY+P75YvLi5WXFyc+vfvf9bGCuDs4/uNLhQroIdycnL0u9/9Ti+99JJqamr0wAMPqLa2Vvfdd58kaf78+brrrrvM/H333aePP/5YOTk5qqmp0UsvvaQXX3xRubm5vfURAJxEa2ur3G633G63pK8ep+B2u83HqfD9xkkZAHrsf/7nf4yhQ4ca/v7+xlVXXWWUlpaa22bOnGkkJCT45Ddv3mz8+Mc/Nvz9/Y1hw4YZq1ev/o5HDOB0vPfee4akbsvMmTMNw+D7jZPjOVYAAAAW4VIgAACARShWAAAAFqFYAQAAWIRiBQAAYBGKFQAAgEUoVgAAABahWAEAAFiEYgUAAGARihWAc9aCBQv0ox/9qLeH8Z3bvHmzbDabmpube3soPs7VcQHnEooVAACARShWAPAd6uzs1LFjx3p7GADOEooVgLNq7dq1uuSSS+T1en3W/+QnP9Fdd911WvtYt26dhg0bJofDoZ/97Gc6cuSIua2oqEjXXHONLrroIl1yySVKSUnRRx99ZG5vb2/X/fffr4iICA0YMEDDhg1TXl7eaR3XZrNp9erVmjJligICAjR8+HD96U9/Mref6NKY2+2WzWbT/v37JUlr1qzRRRddpLfeekvR0dGy2+36+OOP5fV6NW/ePEVGRsput+vyyy/Xiy++6HP8iooKxcXF6cILL9T48eO1Z88ec9tHH32kW265RWFhYRo0aJCuvvpqvfPOOz7v/81vfqPLL79cAwYMUFhYmG6//XZzm2EYevrppzVixAgFBARozJgx+vOf/+zz/o0bN2rkyJEKCAjQDTfcYH4mACdHsQJwVv30pz9VZ2en3njjDXPdJ598orfeekv/+Z//+Y3v/+ijj/T666/rrbfe0ltvvaXS0lI9+eST5va2tjbl5ORo586d2rRpky644ALdeuut5qzQs88+qzfeeEP/+7//qz179uiVV17RsGHDTnv8jz76qH7yk5/o73//u+688079/Oc/V01NzemfAEmff/658vLy9Lvf/U67d+9WaGio7rrrLhUUFOjZZ59VTU2NnnvuOQ0aNMjnfY888oiWLVumXbt2qV+/frr77rvNba2trbrpppv0zjvvqLKyUsnJyUpNTVVtba0kadeuXcrKytKiRYu0Z88eFRUV6brrrjPf///+3//Tyy+/rNWrV2v37t164IEHdOedd6q0tFSSVFdXp9tuu0033XST3G637rnnHv3Xf/3XGX1u4HvJAICz7Je//KUxZcoU8/UzzzxjjBgxwjh27Ngp3/erX/3KuPDCC42WlhZz3YMPPmiMHTv2pO9pbGw0JBlVVVWGYRjGnDlzjIkTJ37jsU5EknHffff5rBs7dqzxy1/+0jAMw3jvvfcMSUZTU5O5vbKy0pBk7Nu3zzAMw3j55ZcNSYbb7TYze/bsMSQZJSUlJzxu137feecdc92GDRsMScbRo0dPOt7o6Ghj5cqVhmEYxvr1642goCCfc9eltbXVGDBggLF161af9bNmzTJ+/vOfG4ZhGPPnzzdGjRrlc94eeuihbp8XgC9mrACcdRkZGSouLta///1vSdLLL7+s9PR02Wy2b3zvsGHDFBgYaL6OiIhQY2Oj+fqjjz5SWlqaRowYoaCgIA0fPlySzJmb9PR0ud1uRUVFKSsrS8XFxWc09vj4+G6vz3TGyt/fX1deeaX52u12y8/PTwkJCad839ffExERIUnmZ29ra9O8efMUHR2tiy66SIMGDdKHH35ofu7ExEQNHTpUI0aMkMvlUn5+vj7//HNJ0gcffKAvvvhCiYmJGjRokLmsXbvWvIxaU1OjcePG+fxvdPy5ANBdv94eAIDz349//GONGTNGa9euVXJysqqqqvTmm2+e1nv79+/v89pms/nc/J2amqrIyEi98MILcjqdOnbsmGJiYtTe3i5Juuqqq7Rv3z69/fbbeueddzR9+nRNmjSp2/1EZ6KrbFxwwVf/bWoYhrmto6OjWz4gIMCnoAQEBJzWcb7+2bve3/XZH3zwQf3f//2fli5dqssuu0wBAQG6/fbbzc8dGBiov/3tb9q8ebOKi4v12GOPacGCBdq5c6e5jw0bNmjIkCE+x7Tb7d0+E4DTR7EC8J245557tGLFCv373//WpEmTFBkZ+a33+emnn6qmpkbPP/+8rr32WklSWVlZt1xQUJBmzJihGTNm6Pbbb9fkyZP12Wef6eKLL/7GY5SXl/vcZF9eXq4f//jHkqTBgwdLkurr6xUcHCzpq9mobzJ69GgdO3ZMpaWlmjRp0jfmT+T9999Xenq6br31Vklf3XN1/M3l/fr106RJkzRp0iT96le/0kUXXaR3331XiYmJstvtqq2tPemsWXR0tF5//XWfdeXl5T0aK/B9QrEC8J244447lJubqxdeeEFr1661ZJ/BwcG65JJL9Nvf/lYRERGqra3tdoP1ihUrFBERoR/96Ee64IIL9Kc//Unh4eG66KKLTusYf/rTnxQXF6drrrlG+fn52rFjh/nrvcsuu0yRkZFasGCBHn/8ce3du1fLli37xn0OGzZMM2fO1N13361nn31WY8aM0ccff6zGxkZNnz79tMZ12WWX6bXXXlNqaqpsNpseffRRn5m8t956S//617903XXXKTg4WBs3btSxY8cUFRWlwMBA5ebm6oEHHtCxY8d0zTXXqKWlRVu3btWgQYM0c+ZM3XfffVq2bJlycnJ07733qqKiQmvWrDmtsQHfZ9xjBeA7ERQUpJ/85CcaNGiQpk2bZsk+L7jgAhUUFKiiokIxMTF64IEHtGTJEp/MoEGD9NRTTykuLk5XX3219u/fr40bN5qX8b7JwoULVVBQoCuvvFK///3vlZ+fr+joaElfXar74x//qA8//FBjxozRU089pccff/y09rt69WrdfvvtyszM1A9/+ENlZGSora3ttD/7ihUrFBwcrPHjxys1NVXJycm66qqrzO0XXXSRXnvtNU2cOFGjRo3Sc889pz/+8Y+64oorJEn//d//rccee0x5eXkaNWqUkpOT9eabb5r3qP3gBz/Q+vXr9eabb2rMmDF67rnntHjx4tMeH/B9ZTO4kA7gO5KYmKhRo0bp2Wef7e2hnBabzabCwkLLiiCA8x+XAgGcdZ999pmKi4v17rvvatWqVb09HAA4a7gUCOCsu+qqq3TvvffqqaeeUlRUlLn+iiuu8Pm5/9eX/Pz8szqm/Pz8kx6763IZAJwpLgUC6DUff/zxCR9PIElhYWE+z6+y2pEjR3To0KETbuvfv7+GDh161o4N4PxFsQIAALAIlwIBAAAsQrECAACwCMUKAADAIhQrAAAAi1CsAAAALEKxAgAAsAjFCgAAwCIUKwAAAIv8fxcO5ezCVCpeAAAAAElFTkSuQmCC",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"import seaborn as sns\n",
|
||
"\n",
|
||
"sns.countplot(train_sport, x='y_has_purchased')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "d056c7b3-0e8c-485c-b2f3-4681077f1c2e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['projet-bdc2324-team1/Generalization/sport/Test_set',\n",
|
||
" 'projet-bdc2324-team1/Generalization/sport/Test_set.csv',\n",
|
||
" 'projet-bdc2324-team1/Generalization/sport/Train_set',\n",
|
||
" 'projet-bdc2324-team1/Generalization/sport/Train_set.csv']"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"fs.ls('projet-bdc2324-team1/Generalization/sport')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "6a9963be-e17b-4cb3-a795-35cece44ce97",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Look at y_has_purchased"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"id": "907bb25a-b555-4cfa-bfc9-785120ae4292",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def display_databases(directory_path, file_name, datetime_col = None):\n",
|
||
" \"\"\"\n",
|
||
" This function returns the file from s3 storage \n",
|
||
" \"\"\"\n",
|
||
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
|
||
" print(\"File path : \", file_path)\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
|
||
" return df "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"id": "d3164f81-0ef2-4f12-bc56-b7a999c4a9cd",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"directory_path = '5'\n",
|
||
"# start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)\n",
|
||
"min_date = \"2021-05-01\"\n",
|
||
"end_features_date = \"2022-11-01\"\n",
|
||
"max_date = \"2023-11-01\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"id": "7cb31d80-41ca-4c2b-89b6-ee50486e7298",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
||
"df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\",\n",
|
||
" datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
||
"df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\",\n",
|
||
" datetime_col = ['purchase_date'])\n",
|
||
"\n",
|
||
"# Filtre de cohérence pour la mise en pratique de notre méthode\n",
|
||
"max_date = pd.to_datetime(max_date, utc = True, format = 'ISO8601') \n",
|
||
"end_features_date = pd.to_datetime(end_features_date, utc = True, format = 'ISO8601')\n",
|
||
"min_date = pd.to_datetime(min_date, utc = True, format = 'ISO8601')\n",
|
||
"\n",
|
||
"df_campaigns_information = df_campaigns_information[(df_campaigns_information['sent_at'] <= end_features_date) & (df_campaigns_information['sent_at'] >= min_date)]\n",
|
||
"df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
|
||
"\n",
|
||
"#Filtre de la base df_products_purchased_reduced\n",
|
||
"df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]\n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"id": "1d63a61e-22b4-4224-89d4-18444276cfaa",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>opened_at</th>\n",
|
||
" <th>sent_at</th>\n",
|
||
" <th>delivered_at</th>\n",
|
||
" <th>campaign_name</th>\n",
|
||
" <th>campaign_service_id</th>\n",
|
||
" <th>campaign_sent_at</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [id, customer_id, opened_at, sent_at, delivered_at, campaign_name, campaign_service_id, campaign_sent_at]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 61,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_campaigns_information.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"id": "a27a80c1-0be2-4199-96e7-566d568b1f51",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ticket_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>supplier_name</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>end_date_time</th>\n",
|
||
" <th>open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>6287839</td>\n",
|
||
" <td>204007</td>\n",
|
||
" <td>545836.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-03-31 03:42:59+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>6287840</td>\n",
|
||
" <td>204007</td>\n",
|
||
" <td>545836.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-03-31 03:42:59+00:00</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>6154548</td>\n",
|
||
" <td>227006</td>\n",
|
||
" <td>535225.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-02-28 16:31:29+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>6154549</td>\n",
|
||
" <td>227006</td>\n",
|
||
" <td>535225.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-02-28 16:31:29+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6287843</td>\n",
|
||
" <td>407930</td>\n",
|
||
" <td>545838.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-03-31 04:00:22+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
|
||
"0 6287839 204007 545836.0 824 fov \n",
|
||
"1 6287840 204007 545836.0 824 fov \n",
|
||
"2 6154548 227006 535225.0 824 fov \n",
|
||
"3 6154549 227006 535225.0 824 fov \n",
|
||
"4 6287843 407930 545838.0 824 fov \n",
|
||
"\n",
|
||
" purchase_date amount is_full_price name_event_types \\\n",
|
||
"0 2022-03-31 03:42:59+00:00 55.0 False match rugby \n",
|
||
"1 2022-03-31 03:42:59+00:00 30.0 False match rugby \n",
|
||
"2 2022-02-28 16:31:29+00:00 55.0 False match rugby \n",
|
||
"3 2022-02-28 16:31:29+00:00 55.0 False match rugby \n",
|
||
"4 2022-03-31 04:00:22+00:00 55.0 False match rugby \n",
|
||
"\n",
|
||
" name_facilities name_categories name_events \\\n",
|
||
"0 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"1 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"2 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"3 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"4 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"\n",
|
||
" name_seasons start_date_time end_date_time \\\n",
|
||
"0 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"1 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"2 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"3 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"4 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"\n",
|
||
" open \n",
|
||
"0 True \n",
|
||
"1 True \n",
|
||
"2 True \n",
|
||
"3 True \n",
|
||
"4 True "
|
||
]
|
||
},
|
||
"execution_count": 62,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_products_purchased_reduced.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"id": "f47357ab-0216-4f70-ab8f-6767819e1cdb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Fusion de l'ensemble et creation des KPI\n",
|
||
"\n",
|
||
"# KPI sur les campagnes publicitaires\n",
|
||
"df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
|
||
"\n",
|
||
"# KPI sur le comportement d'achat\n",
|
||
"df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
|
||
"\n",
|
||
"# KPI sur les données socio-démographiques\n",
|
||
"df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"id": "3d08a2f8-3c83-41c7-98f8-4be268ffa0da",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>6009745</td>\n",
|
||
" <td>1372685</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1771</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>af</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>6011228</td>\n",
|
||
" <td>1372685</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1771</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>af</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>6058950</td>\n",
|
||
" <td>1372685</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1771</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>af</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>6062404</td>\n",
|
||
" <td>1372685</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1771</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>af</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>250217</td>\n",
|
||
" <td>78785</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>11035.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1771</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 30 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
|
||
"0 6009745 1372685 NaN NaN 0 1771 \n",
|
||
"1 6011228 1372685 NaN NaN 0 1771 \n",
|
||
"2 6058950 1372685 NaN NaN 0 1771 \n",
|
||
"3 6062404 1372685 NaN NaN 0 1771 \n",
|
||
"4 250217 78785 NaN 11035.0 0 1771 \n",
|
||
"\n",
|
||
" is_partner deleted_at gender is_email_true ... first_buying_date \\\n",
|
||
"0 False NaN 2 True ... NaN \n",
|
||
"1 False NaN 2 True ... NaN \n",
|
||
"2 False NaN 2 True ... NaN \n",
|
||
"3 False NaN 2 True ... NaN \n",
|
||
"4 False NaN 0 True ... NaN \n",
|
||
"\n",
|
||
" country gender_label gender_female gender_male gender_other country_fr \\\n",
|
||
"0 af other 0 0 1 0.0 \n",
|
||
"1 af other 0 0 1 0.0 \n",
|
||
"2 af other 0 0 1 0.0 \n",
|
||
"3 af other 0 0 1 0.0 \n",
|
||
"4 fr female 1 0 0 1.0 \n",
|
||
"\n",
|
||
" nb_campaigns nb_campaigns_opened time_to_open \n",
|
||
"0 NaN NaN NaT \n",
|
||
"1 NaN NaN NaT \n",
|
||
"2 NaN NaN NaT \n",
|
||
"3 NaN NaN NaT \n",
|
||
"4 NaN NaN NaT \n",
|
||
"\n",
|
||
"[5 rows x 30 columns]"
|
||
]
|
||
},
|
||
"execution_count": 65,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Fusion avec KPI liés au customer\n",
|
||
"df_customer = pd.merge(df_customerplus_clean, df_campaigns_kpi, on = 'customer_id', how = 'left')\n",
|
||
"df_customer.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"id": "bc3d1aed-b2af-48e5-a920-626f2abc3358",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>160516</td>\n",
|
||
" <td>149.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>4470.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>409.693137</td>\n",
|
||
" <td>66.356979</td>\n",
|
||
" <td>343.336157</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2021-09-17 06:39:19+00:00</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>160517</td>\n",
|
||
" <td>1977.0</td>\n",
|
||
" <td>27.0</td>\n",
|
||
" <td>1473.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>431.558519</td>\n",
|
||
" <td>27.733472</td>\n",
|
||
" <td>403.825046</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2021-08-26 09:53:10+00:00</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>160518</td>\n",
|
||
" <td>116.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>439.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>427.177720</td>\n",
|
||
" <td>23.689340</td>\n",
|
||
" <td>403.488380</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2021-08-30 19:01:31+00:00</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>160519</td>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>608.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>483.642940</td>\n",
|
||
" <td>108.777870</td>\n",
|
||
" <td>374.865069</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2019-05-21 08:03:52+00:00</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>160520</td>\n",
|
||
" <td>207.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>431.550012</td>\n",
|
||
" <td>69.310266</td>\n",
|
||
" <td>362.239745</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2019-08-20 15:10:07+00:00</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 39 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 160516 149.0 3.0 4470.0 1.0 \n",
|
||
"1 160517 1977.0 27.0 1473.0 2.0 \n",
|
||
"2 160518 116.0 8.0 439.0 2.0 \n",
|
||
"3 160519 34.0 2.0 608.0 1.0 \n",
|
||
"4 160520 207.0 5.0 0.0 1.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 409.693137 66.356979 \n",
|
||
"1 1.0 431.558519 27.733472 \n",
|
||
"2 0.0 427.177720 23.689340 \n",
|
||
"3 0.0 483.642940 108.777870 \n",
|
||
"4 0.0 431.550012 69.310266 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet ... first_buying_date \\\n",
|
||
"0 343.336157 0.0 ... 2021-09-17 06:39:19+00:00 \n",
|
||
"1 403.825046 15.0 ... 2021-08-26 09:53:10+00:00 \n",
|
||
"2 403.488380 0.0 ... 2021-08-30 19:01:31+00:00 \n",
|
||
"3 374.865069 0.0 ... 2019-05-21 08:03:52+00:00 \n",
|
||
"4 362.239745 0.0 ... 2019-08-20 15:10:07+00:00 \n",
|
||
"\n",
|
||
" country gender_label gender_female gender_male gender_other \\\n",
|
||
"0 fr male 0 1 0 \n",
|
||
"1 fr female 1 0 0 \n",
|
||
"2 fr male 0 1 0 \n",
|
||
"3 fr female 1 0 0 \n",
|
||
"4 fr male 0 1 0 \n",
|
||
"\n",
|
||
" country_fr nb_campaigns nb_campaigns_opened time_to_open \n",
|
||
"0 1.0 0.0 0.0 NaT \n",
|
||
"1 1.0 0.0 0.0 NaT \n",
|
||
"2 1.0 0.0 0.0 NaT \n",
|
||
"3 1.0 0.0 0.0 NaT \n",
|
||
"4 1.0 0.0 0.0 NaT \n",
|
||
"\n",
|
||
"[5 rows x 39 columns]"
|
||
]
|
||
},
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_customer[['nb_campaigns', 'nb_campaigns_opened']] = df_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)\n",
|
||
"# Fusion avec KPI liés au comportement d'achat\n",
|
||
"df_customer_product = pd.merge(df_tickets_kpi, df_customer, on = 'customer_id', how = 'outer')\n",
|
||
"df_customer_product.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"id": "5549e265-3904-464b-964b-518a84a42503",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ticket_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>supplier_name</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>end_date_time</th>\n",
|
||
" <th>open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [ticket_id, customer_id, purchase_id, event_type_id, supplier_name, purchase_date, amount, is_full_price, name_event_types, name_facilities, name_categories, name_events, name_seasons, start_date_time, end_date_time, open]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Fill NaN values\n",
|
||
"df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)\n",
|
||
"\n",
|
||
"# 2. Construction of the explained variable \n",
|
||
"df_products_purchased_to_predict = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= max_date) & (df_products_purchased_reduced['purchase_date'] > end_features_date)]\n",
|
||
"df_products_purchased_to_predict.head()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"id": "be182c6c-012f-447d-a57f-03da65da53f7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<DatetimeArray>\n",
|
||
"['2022-03-31 03:42:59+00:00', '2022-02-28 16:31:29+00:00',\n",
|
||
" '2022-03-31 04:00:22+00:00', '2022-03-31 04:09:18+00:00',\n",
|
||
" '2022-03-25 15:50:52+00:00', '2022-08-01 10:05:49+00:00',\n",
|
||
" '2021-08-26 12:17:40+00:00', '2022-08-02 06:32:37+00:00',\n",
|
||
" '2022-06-30 09:16:59+00:00', '2022-07-03 13:53:30+00:00',\n",
|
||
" ...\n",
|
||
" '2022-01-26 11:34:05+00:00', '2022-01-21 17:07:25+00:00',\n",
|
||
" '2022-01-26 13:43:23+00:00', '2022-01-26 14:38:05+00:00',\n",
|
||
" '2022-01-26 14:39:19+00:00', '2022-01-26 14:40:12+00:00',\n",
|
||
" '2022-01-26 14:41:17+00:00', '2022-01-27 08:16:02+00:00',\n",
|
||
" '2022-01-27 08:45:25+00:00', '2022-01-27 11:57:11+00:00']\n",
|
||
"Length: 49543, dtype: datetime64[ns, UTC]"
|
||
]
|
||
},
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_products_purchased_reduced['purchase_date'].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"id": "aab1cc7e-79be-403c-b9c1-4f4f333b13ff",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ticket_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>supplier_name</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>end_date_time</th>\n",
|
||
" <th>open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>6287839</td>\n",
|
||
" <td>204007</td>\n",
|
||
" <td>545836.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-03-31 03:42:59+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>6287840</td>\n",
|
||
" <td>204007</td>\n",
|
||
" <td>545836.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-03-31 03:42:59+00:00</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>6154548</td>\n",
|
||
" <td>227006</td>\n",
|
||
" <td>535225.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-02-28 16:31:29+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>6154549</td>\n",
|
||
" <td>227006</td>\n",
|
||
" <td>535225.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-02-28 16:31:29+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>6287843</td>\n",
|
||
" <td>407930</td>\n",
|
||
" <td>545838.0</td>\n",
|
||
" <td>824</td>\n",
|
||
" <td>fov</td>\n",
|
||
" <td>2022-03-31 04:00:22+00:00</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>match rugby</td>\n",
|
||
" <td>jean bouin</td>\n",
|
||
" <td>centrale</td>\n",
|
||
" <td>sf paris / racing 92 (ercc)</td>\n",
|
||
" <td>saison 2021 - 2022</td>\n",
|
||
" <td>2022-04-08 22:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
|
||
"0 6287839 204007 545836.0 824 fov \n",
|
||
"1 6287840 204007 545836.0 824 fov \n",
|
||
"2 6154548 227006 535225.0 824 fov \n",
|
||
"3 6154549 227006 535225.0 824 fov \n",
|
||
"4 6287843 407930 545838.0 824 fov \n",
|
||
"\n",
|
||
" purchase_date amount is_full_price name_event_types \\\n",
|
||
"0 2022-03-31 03:42:59+00:00 55.0 False match rugby \n",
|
||
"1 2022-03-31 03:42:59+00:00 30.0 False match rugby \n",
|
||
"2 2022-02-28 16:31:29+00:00 55.0 False match rugby \n",
|
||
"3 2022-02-28 16:31:29+00:00 55.0 False match rugby \n",
|
||
"4 2022-03-31 04:00:22+00:00 55.0 False match rugby \n",
|
||
"\n",
|
||
" name_facilities name_categories name_events \\\n",
|
||
"0 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"1 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"2 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"3 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"4 jean bouin centrale sf paris / racing 92 (ercc) \n",
|
||
"\n",
|
||
" name_seasons start_date_time end_date_time \\\n",
|
||
"0 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"1 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"2 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"3 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"4 saison 2021 - 2022 2022-04-08 22:00:00+02:00 1901-01-01 00:09:21+00:09 \n",
|
||
"\n",
|
||
" open \n",
|
||
"0 True \n",
|
||
"1 True \n",
|
||
"2 True \n",
|
||
"3 True \n",
|
||
"4 True "
|
||
]
|
||
},
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= max_date)].head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"id": "ce59de67-127e-4b0a-b96c-9684d87792dd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Timestamp('2022-10-31 23:17:26+0000', tz='UTC')"
|
||
]
|
||
},
|
||
"execution_count": 74,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_products_purchased_reduced['purchase_date'].max()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "184463d1-b0dd-44b9-a9a3-4ab32c8c13c1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|