{ "cells": [ { "cell_type": "markdown", "id": "5bf5c226", "metadata": {}, "source": [ "# Business Data Challenge - Team 1" ] }, { "cell_type": "code", "execution_count": 1, "id": "b1a5b9d3", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import s3fs\n", "import re\n", "import warnings" ] }, { "cell_type": "markdown", "id": "ecfa2219", "metadata": {}, "source": [ "Configuration de l'accès aux données" ] }, { "cell_type": "code", "execution_count": 2, "id": "1a094277", "metadata": {}, "outputs": [], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})" ] }, { "cell_type": "code", "execution_count": 3, "id": "30d77451-2df6-4c07-8b15-66e0e990ff03", "metadata": {}, "outputs": [], "source": [ "# Import cleaning and merge functions\n", "\n", "exec(open('0_Cleaning_and_merge_functions.py').read())\n", "\n", "exec(open('0_KPI_functions.py').read())\n", "\n", "# Ignore warning\n", "warnings.filterwarnings('ignore')\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "f1b44d3e-76bb-4860-b9db-a2840db7cf39", "metadata": {}, "outputs": [], "source": [ "def load_dataset_2(directory_path, file_name):\n", " \"\"\"\n", " This function loads csv file\n", " \"\"\"\n", " file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n", " with fs.open(file_path, mode=\"rb\") as file_in:\n", " df = pd.read_csv(file_in, sep=\",\")\n", "\n", " # drop na :\n", " #df = df.dropna(axis=1, thresh=len(df))\n", " # if identifier in table : delete it\n", " if 'identifier' in df.columns:\n", " df = df.drop(columns = 'identifier')\n", " return df" ] }, { "cell_type": "code", "execution_count": 5, "id": "31ab76f0-fbb1-46f6-b359-97228620c207", "metadata": {}, "outputs": [], "source": [ "def export_in_temporary(df, output_name):\n", " print('Export of dataset :', output_name)\n", " FILE_PATH_OUT_S3 = \"ajoubrel-ensae/Temporary\" + \"/\" + output_name + '.csv'\n", " with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n", " df.to_csv(file_out, index = False)" ] }, { "cell_type": "markdown", "id": "ccf597b0-b459-4ea5-baf0-5ba8c90915e4", "metadata": {}, "source": [ "# Cleaning target area and tags" ] }, { "cell_type": "code", "execution_count": 14, "id": "fd88e294-e038-4cec-ad94-2bbbc10a4059", "metadata": {}, "outputs": [], "source": [ "def concatenate_names(names):\n", " return ', '.join(names)\n", "\n", "def targets_KPI(df_target = None):\n", " \n", " df_target['target_name'] = df_target['target_name'].fillna('').str.lower()\n", "\n", " # Target name cotegory musees / \n", " df_target['target_jeune'] = df_target['target_name'].str.contains('|'.join(['jeune', 'pass_culture', 'etudiant', '12-25 ans', 'student', 'jeunesse']), case=False).astype(int)\n", " df_target['target_optin'] = df_target['target_name'].str.contains('|'.join(['optin' ,'opt-in']), case=False).astype(int)\n", " df_target['target_optout'] = df_target['target_name'].str.contains('|'.join(['optout', 'unsubscribed']), case=False).astype(int)\n", " df_target['target_scolaire'] = df_target['target_name'].str.contains('|'.join(['scolaire' , 'enseignant', 'chercheur', 'schulen', 'école']), case=False).astype(int)\n", " df_target['target_entreprise'] = df_target['target_name'].str.contains('|'.join(['b2b', 'btob', 'cse']), case=False).astype(int)\n", " df_target['target_famille'] = df_target['target_name'].str.contains('|'.join(['famille', 'enfants', 'family']), case=False).astype(int)\n", " df_target['target_newsletter'] = df_target['target_name'].str.contains('|'.join(['nl', 'newsletter']), case=False).astype(int)\n", " \n", " # Target name category for sport compagnies\n", " df_target['target_abonne'] = ((\n", " df_target['target_name']\n", " .str.contains('|'.join(['abo', 'adh']), case=False)\n", " & ~df_target['target_name'].str.contains('|'.join(['hors abo', 'anciens abo']), case=False)\n", " ).astype(int))\n", " \n", " df_target_categorie = df_target.groupby('customer_id')[['target_jeune', 'target_optin', 'target_optout', 'target_scolaire', 'target_entreprise', 'target_famille', 'target_newsletter', 'target_abonne']].max()\n", " \n", " target_agg = df_target.groupby('customer_id').agg(\n", " nb_targets=('target_name', 'nunique') # Utilisation de tuples pour spécifier les noms de colonnes\n", " # all_targets=('target_name', concatenate_names),\n", " # all_target_types=('target_type_name', concatenate_names)\n", " ).reset_index()\n", "\n", " target_agg['nb_targets'] = (target_agg['nb_targets'] - (target_agg['nb_targets'].mean())) / (target_agg['nb_targets'].std())\n", " \n", " target_agg = pd.merge(target_agg, df_target_categorie, how='left', on='customer_id')\n", " \n", " return target_agg" ] }, { "cell_type": "code", "execution_count": 15, "id": "1b124018-9637-463e-b512-15743ec9480b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File path : projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idnb_targetstarget_jeunetarget_optintarget_optouttarget_scolairetarget_entreprisetarget_familletarget_newslettertarget_abonne
01605166.93826401001001
116051710.35738701100001
21605185.22870301100001
31605196.08348301100101
41605202.94928801000001
.................................
4712056405875-0.75476200100000
4712066405905-0.46983500100000
4712076405909-0.75476200100000
4712086405917-0.75476200100000
4712096405963-0.75476200100000
\n", "

471210 rows × 10 columns

\n", "
" ], "text/plain": [ " customer_id nb_targets target_jeune target_optin target_optout \\\n", "0 160516 6.938264 0 1 0 \n", "1 160517 10.357387 0 1 1 \n", "2 160518 5.228703 0 1 1 \n", "3 160519 6.083483 0 1 1 \n", "4 160520 2.949288 0 1 0 \n", "... ... ... ... ... ... \n", "471205 6405875 -0.754762 0 0 1 \n", "471206 6405905 -0.469835 0 0 1 \n", "471207 6405909 -0.754762 0 0 1 \n", "471208 6405917 -0.754762 0 0 1 \n", "471209 6405963 -0.754762 0 0 1 \n", "\n", " target_scolaire target_entreprise target_famille target_newsletter \\\n", "0 0 1 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 1 0 \n", "4 0 0 0 0 \n", "... ... ... ... ... \n", "471205 0 0 0 0 \n", "471206 0 0 0 0 \n", "471207 0 0 0 0 \n", "471208 0 0 0 0 \n", "471209 0 0 0 0 \n", "\n", " target_abonne \n", "0 1 \n", "1 1 \n", "2 1 \n", "3 1 \n", "4 1 \n", "... ... \n", "471205 0 \n", "471206 0 \n", "471207 0 \n", "471208 0 \n", "471209 0 \n", "\n", "[471210 rows x 10 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "targets_KPI(display_input_databases('5', file_name = \"target_information\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "c75efea3-b5e8-4a7a-bed4-dd64ae9ff9f2", "metadata": {}, "outputs": [], "source": [ "#export_inv_temporary(target_agg, 'Target_kpi_concatenate')" ] }, { "cell_type": "code", "execution_count": 34, "id": "9d224485-3472-4cc7-9825-1a643bc94fef", "metadata": {}, "outputs": [], "source": [ "nb_compagnie = ['1', '2']\n", "\n", "def load_files(nb_compagnie):\n", " targets = pd.DataFrame()\n", " \n", " # début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n", " for directory_path in nb_compagnie:\n", " df_customerplus_clean_0 = display_input_databases(directory_path, file_name = \"customerplus_cleaned\")\n", " df_target_information = display_input_databases(directory_path, file_name = \"target_information\")\n", " \n", " df_target_KPI = targets_KPI(df_target = df_target_information)\n", " df_target_KPI = pd.merge(df_customerplus_clean_0[['customer_id']], df_target_KPI, how = 'left', on = 'customer_id')\n", "\n", " targets_columns = list(df_target_KPI.columns)\n", " targets_columns.remove('customer_id')\n", " df_target_KPI[targets_columns] = df_target_KPI[targets_columns].fillna(0)\n", " \n", " # creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n", " df_target_KPI[\"number_company\"]=int(directory_path)\n", " \n", " # Traitement des index\n", " df_target_KPI[\"customer_id\"]= directory_path + '_' + df_target_KPI['customer_id'].astype('str')\n", " \n", " # Concaténation\n", " targets = pd.concat([targets, df_target_KPI], ignore_index=True)\n", " \n", " return targets" ] }, { "cell_type": "code", "execution_count": 50, "id": "3c911274-0ebd-49af-9487-26524ba20e74", "metadata": {}, "outputs": [], "source": [ "companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n", " 'sport': ['5', '6', '7', '8', '9'],\n", " 'musique' : ['10', '11', '12', '13', '14']}\n", "\n", "def target_description(targets):\n", "\n", " describe_target = targets.groupby('number_company').agg(\n", " prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100),\n", " prop_target_optin=('target_optin', lambda x: (x.sum() / x.count())*100),\n", " prop_target_optout=('target_optout', lambda x: (x.sum() / x.count())*100),\n", " prop_target_scolaire=('target_scolaire', lambda x: (x.sum() / x.count())*100),\n", " prop_target_entreprise=('target_entreprise', lambda x: (x.sum() / x.count())*100),\n", " prop_target_famille=('target_famille', lambda x: (x.sum() / x.count())*100),\n", " prop_target_newsletter=('target_newsletter', lambda x: (x.sum() / x.count())*100))\n", "\n", " plot = describe_target[['prop_target_jeune', 'prop_target_scolaire', 'prop_target_entreprise', 'prop_target_famille']].plot.bar()\n", " \n", " return plot" ] }, { "cell_type": "code", "execution_count": 51, "id": "af62ecef-9120-4107-af3e-512588a96800", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGrCAYAAADqwWxuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCzklEQVR4nO3dfXzPdf////vbMJvNpslmGm++mzDkXKyyDs2SnB4VITmUkMmsDIfRZk5qbPZBIR0xR+cdpUPD4ayILWEoh5ORiIpWRzJnOdlevz9c9v5573y85/0at+vlssvF6/l6vl7Px/u9zfu+5+vMYhiGIQAAABOp5OwCAAAA8iOgAAAA0yGgAAAA0yGgAAAA0yGgAAAA0yGgAAAA0yGgAAAA06ns7AKuR25urn7++Wd5enrKYrE4uxwAAFAKhmHozJkz8vf3V6VKxc+RVMiA8vPPPysgIMDZZQAAgOtw/Phx3XXXXcX2qZABxdPTU9LVF1ijRg0nVwMAAEojOztbAQEBts/x4lTIgJJ3WKdGjRoEFAAAKpjSnJ7BSbIAAMB0CCgAAMB0CCgAAMB0KuQ5KABwM+Tk5Ojy5cvOLgOoUKpWrVriJcSlQUABgHwMw9DJkyf1xx9/OLsUoMKpVKmSGjRooKpVq97QfggoAJBPXjipXbu23N3duSEkUEp5N1I9ceKE6tWrd0O/OwQUALhGTk6OLZz4+Pg4uxygwrnzzjv1888/68qVK6pSpcp176fMB4m+/PJL9ejRQ/7+/rJYLPr000/t1huGodjYWPn7+8vNzU2hoaHau3evXZ+LFy9q9OjRqlWrlqpXr66ePXvqxx9/vO4XAQCOknfOibu7u5MrASqmvEM7OTk5N7SfMgeUc+fO6Z577tH8+fMLXZ+QkKCkpCTNnz9f27dvl5+fn8LCwnTmzBlbn8jISC1fvlzvv/++tmzZorNnz+rRRx+94RcDAI7CYR3g+jjqd6fMh3i6deumbt26FbrOMAwlJydr0qRJ6tu3ryQpJSVFvr6+evfddzV8+HCdPn1a//jHP/TPf/5TDz30kCTp7bffVkBAgNavX6/w8PAC+7148aIuXrxoW87Ozi5r2QAAoAJx6H1Qjhw5opMnT6pr1662NldXV3Xu3Fnp6emSpIyMDF2+fNmuj7+/v5o1a2brk9/MmTPl5eVl++JBgQAA3NocepLsyZMnJUm+vr527b6+vvrhhx9sfapWraqaNWsW6JO3fX4TJ05UVFSUbTnvYUMAcDNZJ6y8qeMdfaX7TR0PRVu6dKkiIyO59PwmKpc7yeY//mQYRonHpIrr4+rqanswIA8IBICKpbALKpytrDX169dPBw8eLL+CUIBDA4qfn58kFZgJycrKss2q+Pn56dKlSzp16lSRfQAA5e/SpUvOLqFMnFmvm5ubateu7bTxb0cODSgNGjSQn5+f1q1bZ2u7dOmSNm3apE6dOkmS2rRpoypVqtj1OXHihP773//a+gAAyi40NFQRERGKiIiQt7e3fHx8FBMTI8MwJElWq1XTpk3TkCFD5OXlpWHDhkmSPv74YwUHB8vV1VVWq1WJiYl2+7VarYqPj9eAAQPk4eEhf39/zZs3r1Q1Wa1WSVKfPn1ksVhsy4cPH1avXr3k6+srDw8PtWvXTuvXry+wbWH1Ll68WAEBAXJ3d1efPn2UlJQkb29vu20/++wztWnTRtWqVVPDhg0VFxenK1euFFtTcZYuXVqmMY4ePSqLxaLdu3fb+v/xxx+yWCzauHGjJGnjxo2yWCzasGGD2rZtK3d3d3Xq1EmZmZmlHudWVuZzUM6ePavvvvvOtnzkyBHt3r1bd9xxh+rVq6fIyEjNmDFDQUFBCgoK0owZM+Tu7q4BAwZIkry8vPTMM8/oxRdflI+Pj+644w699NJLat68ue2qHhTtZh8DvxbHwwHzS0lJ0TPPPKOvv/5aO3bs0HPPPaf69evbPtxnzZqlyZMnKyYmRtLVCxeeeOIJxcbGql+/fkpPT9fzzz8vHx8fDRkyxLbfWbNm6e9//7tiY2O1Zs0ajR07Vo0bN1ZYWFix9Wzfvl21a9fWkiVL9PDDD8vFxUXS1c+SRx55RNOmTVO1atWUkpKiHj16KDMzU/Xq1bMb99p609LSNGLECL366qvq2bOn1q9fr8mTJ9uNuWbNGg0aNEhz587V/fffr8OHD+u5556TJL388stF1lQWJY1RFpMmTVJiYqLuvPNOjRgxQkOHDlVaWprDx6loLEZetC6ljRs36sEHHyzQ/vTTT2vp0qUyDENxcXFatGiRTp06pQ4dOui1115Ts2bNbH3//PNPjRs3Tu+++64uXLigLl266PXXXy/1ia/Z2dny8vLS6dOnb7vzUQgoQPn6888/deTIETVo0EDVqlWzW2f2k2RDQ0OVlZWlvXv32s7pmzBhglasWKF9+/bJarWqVatWWr58uW2bgQMH6tdff9XatWttbdHR0Vq5cqXtJptWq1VNmjTR6tWrbX369++v7OxsrVq1qsS6LBaLli9frt69exfbLzg4WCNHjlRERIRt3Pz19u/fX2fPnlVqaqqtbdCgQUpNTbWdwPrAAw+oW7dumjhxoq3P22+/rejoaP38889lqilP/pNkSxrj6NGjatCggXbt2qWWLVtKujqDUrNmTX3xxRcKDQ21fZ6uX79eXbp0kSStWrVK3bt314ULF1StWrVSvRazKe53qCyf32WeQQkNDVVxmcZisSg2NlaxsbFF9qlWrZrmzZtX6ilCAEDp3HvvvXYXHHTs2FGJiYm2G2G2bdvWrv/+/fvVq1cvu7aQkBAlJycrJyfHNrvQsWNHuz4dO3ZUcnLyddd57tw5xcXFKTU11XZb9AsXLujYsWN2/fLXm5mZqT59+ti1tW/f3i6wZGRkaPv27Zo+fbqtLScnR3/++afOnz/vkLsElzRGWbRo0cL27zp16ki6el5mvXr1bsprMSuexQMAt5Hq1avbLRd2BWVpJ9Zv5I6h48aN05o1azR79mwFBgbKzc1Njz32WIETYa+n3tzcXMXFxdluGHqt/H/RX6+SxqhUqVKB2vIeo5Dftc+ryXttubm5pRrnVkZAAYBbyNatWwssBwUFFXmeRdOmTbVlyxa7tvT0dDVq1Mhum8L227hx41LVVKVKlQKPMtm8ebOGDBlimw05e/asjh49WuK+GjdurG3bttm17dixw265devWyszMVGBgYJlqKouSxrjzzjslXb0IpFWrVpJkd8Kso8a5lRFQAOAWcvz4cUVFRWn48OHauXOn5s2bV+CqnGu9+OKLateuneLj49WvXz999dVXmj9/vl5//XW7fmlpaUpISFDv3r21bt06ffTRR1q5snTn5FitVm3YsEEhISFydXVVzZo1FRgYqE8++UQ9evSQxWLR5MmTbbMGxRk9erQeeOABJSUlqUePHvr888+1evVqu1mVKVOm6NFHH1VAQIAef/xxVapUSd9++6327NmjadOmFVlTWZQ0hpubm+6991698sorslqt+u2332wn+jpynFsZAQUASqkinCg+ePBgXbhwQe3bt5eLi4tGjx5tu+qjMK1bt9aHH36oKVOmKD4+XnXq1NHUqVPtruCRrgaZjIwMxcXFydPTU4mJiYU+O60wiYmJioqK0uLFi1W3bl0dPXpUc+bM0dChQ9WpUyfVqlVL48ePL9Vz1kJCQrRw4ULFxcUpJiZG4eHhGjt2rN0DbMPDw5WamqqpU6cqISFBVapUUePGjfXss88WW1NZlGaMt956S0OHDlXbtm119913KyEhwe4xL44a51ZV5qt4zICreJyjIvznDNyo4q5AMLvQ0FC1bNnyhk5eLYzValVkZKQiIyMdul9HGTZsmA4cOKDNmzeX2xiLFi1SfHy8fvzxx3Ib41bhtKt4AABwptmzZyssLEzVq1fX6tWrlZKSUuCQlCMdP35cq1atUnBwcLmNgYIIKACA6/bOO+9o+PDhha6rX7++7V4qjrRt2zYlJCTozJkzatiwoebOnXvDhzyCg4NtD7XN7+LFiwoODtbSpUtvaAyUDQEFAG4RebdQd7Tizs/o2bOnOnToUOi6ay+fdaQPP/zQ4ftctWpVkZcB+/r6ytPT0+FjongEFADAdfP09LwlPrzr16/v7BKQj0MfFggAAOAIBBQAAGA6BBQAAGA6BBQAAGA6BBQAAGA6XMUDAKUV63WTxzt9c8eDU2zcuFEPPvigTp06JW9v71JtU153DTYTZlAAAOXKYrHo008/dXYZdsxYU1l88sknio+Pd3YZ5YqAAgC3qUuXLjm7hDKpaPWWpzvuuKPY+8/cCu8VAQUAbhGhoaGKiIhQRESEvL295ePjo5iYGOU9E9ZqtWratGkaMmSIvLy8NGzYMEnSxx9/rODgYLm6uspqtSoxMdFuv1arVfHx8RowYIA8PDzk7++vefPmlaomq9UqSerTp48sFott+fDhw+rVq5d8fX3l4eGhdu3aaf369QW2LazexYsXKyAgQO7u7urTp4+SkpIKHBr57LPP1KZNG1WrVk0NGzZUXFycrly5UmxNxfnmm2/04IMPytPTUzVq1FCbNm20Y8cO2/q0tDR17txZ7u7uqlmzpsLDw3Xq1ClJV2+V/8ILL6h27dqqVq2a7rvvPm3fvr3Isf73v//pySef1F133SV3d3c1b95c7733nl2f0NBQu4c3FvVepaen64EHHpCbm5sCAgL0wgsv6Ny5cyW+XjMgoADALSQlJUWVK1fW119/rblz52rOnDl68803betnzZqlZs2aKSMjQ5MnT1ZGRoaeeOIJ9e/fX3v27FFsbKwmT55c4Lkzs2bNUosWLbRz505NnDhRY8eO1bp160qsJ++DeMmSJTpx4oRt+ezZs3rkkUe0fv167dq1S+Hh4erRo4eOHTtWYNxr601LS9OIESM0ZswY7d69W2FhYZo+fbrdNmvWrNGgQYP0wgsvaN++fVq0aJGWLl1q61dUTcUZOHCg7rrrLm3fvl0ZGRmaMGGC7Vb+u3fvVpcuXRQcHKyvvvpKW7ZsUY8ePZSTkyNJio6O1scff6yUlBTt3LlTgYGBCg8P1++//17oWH/++afatGmj1NRU/fe//9Vzzz2np556Sl9//XWxNeZ/r/bs2aPw8HD17dtX3377rT744ANt2bJFERERJb5eM7AYedG6AinL45pvNdYJK5029tFXujttbOBmKe5R8WY/STY0NFRZWVnau3evLBaLJGnChAlasWKF9u3bJ6vVqlatWmn58uW2bQYOHKhff/1Va9eutbVFR0dr5cqVtgf9Wa1WNWnSRKtXr7b16d+/v7Kzs7Vq1aoS67JYLFq+fLl69+5dbL/g4GCNHDnS9gFaWL39+/fX2bNnlZqaamsbNGiQUlNT9ccff0iSHnjgAXXr1k0TJ0609Xn77bcVHR2tn3/+uUw15alRo4bmzZunp59+usC6AQMG6NixY9qyZUuBdefOnVPNmjW1dOlSDRgwQJJ0+fJlWa1WRUZGaty4caU6SbZ79+5q0qSJZs+eLangSbKFvVeDBw+Wm5ubFi1aZGvbsmWLOnfurHPnzhX8+XaQ4n6HyvL5zQwKANxC7r33Xls4kaSOHTvq0KFDtr/m27Zta9d///79CgkJsWsLCQmx2yZvP9fq2LGj9u/ff911njt3TtHR0WratKm8vb3l4eGhAwcOFJhByV9vZmam2rdvb9eWfzkjI0NTp06Vh4eH7WvYsGE6ceKEzp8/f131RkVF6dlnn9VDDz2kV155RYcPH7aty5tBKczhw4d1+fJlu/e4SpUqat++fZHvX05OjqZPn64WLVrIx8dHHh4eWrt2bYH3Jr/871VGRoaWLl1q9z6Eh4crNzdXR44cKe1LdxouMwaA20j16tXtlg3DsAs0eW2lkX+7shg3bpzWrFmj2bNnKzAwUG5ubnrssccKnNx5PfXm5uYqLi5Offv2LTDu9c4axMbGasCAAVq5cqVWr16tl19+We+//7769OkjNze3IrfLq62wmot6/xITEzVnzhwlJyerefPmql69uiIjI0s88TX/e5Wbm6vhw4frhRdeKNC3Xr16xe7LDAgoAHAL2bp1a4HloKAgubi4FNq/adOmBQ5NpKenq1GjRnbbFLbfxo0bl6qmKlWq2M3GSNLmzZs1ZMgQ9enTR9LVc1KOHj1a4r4aN26sbdu22bVde7KqJLVu3VqZmZkKDAwsU00ladSokRo1aqSxY8fqySef1JIlS9SnTx+1aNFCGzZsUFxcXIFtAgMDVbVqVW3ZssXuEM+OHTvsTnK91ubNm9WrVy8NGjRI0tWgcejQITVp0qRM9bZu3Vp79+4t9n0wMw7xAMAt5Pjx44qKilJmZqbee+89zZs3T2PGjCmy/4svvqgNGzYoPj5eBw8eVEpKiubPn6+XXnrJrl9aWpoSEhJ08OBBvfbaa/roo4+K3e+1rFarNmzYoJMnT9qubAkMDNQnn3yi3bt365tvvtGAAQOUm5tb4r5Gjx6tVatWKSkpSYcOHdKiRYu0evVqu9mIKVOmaNmyZYqNjdXevXu1f/9+ffDBB4qJiSm2pqJcuHBBERER2rhxo3744QelpaVp+/bttsAwceJEbd++Xc8//7y+/fZbHThwQAsWLNBvv/2m6tWra+TIkRo3bpz+85//aN++fRo2bJjOnz+vZ555ptDxAgMDtW7dOqWnp2v//v0aPny4Tp48WeJ7k9/48eP11VdfadSoUdq9e7cOHTqkFStWaPTo0WXelzMwgwIApVUB7uw6ePBgXbhwQe3bt5eLi4tGjx6t5557rsj+rVu31ocffqgpU6YoPj5ederU0dSpUzVkyBC7fi+++KIyMjIUFxcnT09PJSYmKjw8vFQ1JSYmKioqSosXL1bdunV19OhRzZkzR0OHDlWnTp1Uq1YtjR8/XtnZ2SXuKyQkRAsXLlRcXJxiYmIUHh6usWPHav78+bY+4eHhSk1N1dSpU5WQkKAqVaqocePGevbZZ4utqSguLi763//+p8GDB+uXX35RrVq11LdvX9uMSaNGjbR27Vr9/e9/V/v27eXm5qYOHTroySeflCS98sorys3N1VNPPaUzZ86obdu2WrNmjWrWrFnoeJMnT9aRI0cUHh4ud3d3Pffcc+rdu7dOny7bz1+LFi20adMmTZo0Sffff78Mw9D/+3//T/369SvTfpyFq3gqGK7iAcpXsVfxmFx53f4874qTog5JONuwYcN04MABbd682dmlQI67iocZFABAhTJ79myFhYWpevXqWr16tVJSUvT66687uyw4GOegAACu2zvvvGN3Geu1X8HBweUy5rZt2xQWFqbmzZtr4cKFmjt3rt3hm+sRHBxc5Ot45513HFQ5yoIZFAC4RWzcuLFc9lvc+Rk9e/ZUhw4dCl2Xd6dVR/vwww8dvs9Vq1bp8uXLha7z9fV1+HgoGQEFAHDdPD09i31oXUVRv359Z5eAfDjEAwAATIeAAgAATIeAAgAATIeAAgAATIeAAgAATIereACglJqnNL+p4+15es9NHQ+3h6NHj6pBgwbatWuXWrZs6exyisQMCgCgXFksFn366afOLsOOM2rauHGjLBaL/vjjj5s6bn4BAQE6ceKEmjVr5tQ6SkJAAYDb1KVLl5xdQplUtHqvV3m+zkuXLsnFxUV+fn6qXNncB1EIKABwiwgNDVVERIQiIiLk7e0tHx8fxcTEKO+ZsFarVdOmTdOQIUPk5eWlYcOGSZI+/vhjBQcHy9XVVVarVYmJiXb7tVqtio+P14ABA+Th4SF/f3/NmzevVDVZrVZJUp8+fWSxWGzLhw8fVq9eveTr6ysPDw+1a9dO69evL7BtYfUuXrxYAQEBcnd3V58+fZSUlCRvb2+7bT/77DO1adNG1apVU8OGDRUXF6crV64UW1NJitundHVW5s0331SfPn3k7u6uoKAgrVixQtLVwyoPPvigJKlmzZqyWCy2J0bnfd+ioqJUq1YthYWFSZL27dunRx55RB4eHvL19dVTTz2l3377zTZeSd/vot7Do0ePymKxaPfu3ZKkU6dOaeDAgbrzzjvl5uamoKAgLVmyxLaPn376Sf369VPNmjXl4+OjXr16FXt3YUchoADALSQlJUWVK1fW119/rblz52rOnDl68803betnzZqlZs2aKSMjQ5MnT1ZGRoaeeOIJ9e/fX3v27FFsbKwmT56spUuX2u131qxZatGihXbu3KmJEydq7NixWrduXYn1bN++XZK0ZMkSnThxwrZ89uxZPfLII1q/fr127dql8PBw9ejRQ8eOHSsw7rX1pqWlacSIERozZox2796tsLAwTZ8+3W6bNWvWaNCgQXrhhRe0b98+LVq0SEuXLrX1K6qm4pS0zzxxcXF64okn9O233+qRRx7RwIED9fvvvysgIEAff/yxJCkzM1MnTpzQ//3f/9m2y/u+paWladGiRTpx4oQ6d+6sli1baseOHfrPf/6jX375RU888YTdeCV9vwt7D/ObPHmy9u3bp9WrV2v//v1asGCBatWqJUk6f/68HnzwQXl4eOjLL7/Uli1b5OHhoYcffrjcZ7QsxrVRq4Ioy+OabzXWCSudNvbRV7o7bWzgZinuUfFmP0k2NDRUWVlZ2rt3rywWiyRpwoQJWrFihfbt2yer1apWrVpp+fLltm0GDhyoX3/9VWvXrrW1RUdHa+XKldq7d6+kq3+FN2nSRKtXr7b16d+/v7Kzs7Vq1aoS67JYLFq+fLl69+5dbL/g4GCNHDlSERERtnHz19u/f3+dPXtWqamptrZBgwYpNTXVdm7HAw88oG7dumnixIm2Pm+//baio6P1888/l6mmPKXdZ0xMjOLj4yVJ586dk6enp1atWqWHH35YGzdu1IMPPqhTp07ZzfiEhobq9OnT2rVrl61typQp+vrrr7VmzRpb248//qiAgABlZmaqUaNGJX6/i3oP858k27NnT9WqVUtvvfVWgdf91ltvKSEhQfv377eNcenSJXl7e+vTTz9V165dC2xT3O9QWT6/mUEBgFvIvffea/sgkaSOHTvq0KFDysnJkSS1bdvWrv/+/fsVEhJi1xYSEmK3Td5+rtWxY0ft37//uus8d+6coqOj1bRpU3l7e8vDw0MHDhwoMIOSv97MzEy1b9/eri3/ckZGhqZOnWr3ROJhw4bpxIkTOn/+/HXVW9p9tmjRwvbv6tWry9PTU1lZWSXuP//rzMjI0BdffGE3XuPGjSVdPTyWp6Tvd2H7zm/kyJF6//331bJlS0VHRys9Pd2uju+++06enp62Ou644w79+eefdnWUB3OfIQMAcKjq1avbLRuGYfcBl9dWGvm3K4tx48ZpzZo1mj17tgIDA+Xm5qbHHnuswGGD66k3NzdXcXFx6tu3b4Fx8/9FX1ql3Wf+JzhbLBbl5uaWuP/8rzM3N1c9evTQq6++WqBvnTp1Slt2ofvOr1u3bvrhhx+0cuVKrV+/Xl26dNGoUaM0e/Zs5ebmqk2bNnrnnXcKbHfnnXeWqY6yIqAAwC1k69atBZaDgoLk4uJSaP+mTZtqy5Ytdm3p6elq1KiR3TaF7TfvL/qSVKlSxe4veknavHmzhgwZoj59+ki6ek5KaU68bNy4sbZt22bXtmPHDrvl1q1bKzMzU4GBgWWqqTil2WdJqlatKkmlGrd169b6+OOPZbVai73apqzf76LceeedGjJkiIYMGaL7779f48aN0+zZs9W6dWt98MEHql279k0/pYJDPABwCzl+/LiioqKUmZmp9957T/PmzdOYMWOK7P/iiy9qw4YNio+P18GDB5WSkqL58+frpZdesuuXlpamhIQEHTx4UK+99po++uijYvd7LavVqg0bNujkyZM6deqUJCkwMFCffPKJdu/erW+++UYDBgwo1UzD6NGjtWrVKiUlJenQoUNatGiRVq9ebTerMmXKFC1btkyxsbHau3ev9u/frw8++EAxMTHF1lSc0uyzJPXr15fFYlFqaqp+/fVXnT17tsi+o0aN0u+//64nn3xS27Zt0/fff6+1a9dq6NChdgGnrN/vol7bv//9b3333Xfau3evUlNT1aRJE0lXz1GqVauWevXqpc2bN+vIkSPatGmTxowZox9//LFM45QVMygAUEoV4c6ugwcP1oULF9S+fXu5uLho9OjReu6554rs37p1a3344YeaMmWK4uPjVadOHU2dOtV2CWyeF198URkZGYqLi5Onp6cSExMVHh5eqpoSExMVFRWlxYsXq27dujp69KjmzJmjoUOHqlOnTqpVq5bGjx+v7OzsEvcVEhKihQsXKi4uTjExMQoPD9fYsWM1f/58W5/w8HClpqZq6tSpSkhIUJUqVdS4cWM9++yzxdZUnNLssyR169ZVXFycJkyYoL/97W8aPHhwgaul8vj7+ystLU3jx49XeHi4Ll68qPr16+vhhx9WpUr//9xCWb/fhalataomTpyoo0ePys3NTffff7/ef/99SZK7u7u+/PJLjR8/Xn379tWZM2dUt25ddenSpdxnVLiKp4LhKh6gfBV3BYLZhYaGqmXLlkpOTnbofq1WqyIjIxUZGenQ/TrKsGHDdODAAW3evNnZpdxU5fX9vlGOuoqHGRQAQIUye/ZshYWFqXr16lq9erVSUlL0+uuvO7ssOBjnoAAArts777xjdynstV/BwcHlMua2bdsUFham5s2ba+HChZo7d26ZDrUUJjg4uMjXUdgVLCh/zKAAwC1i48aN5bLf4s7P6Nmzpzp06FDouvyX3DrKhx9+6PB9rlq1SpcvXy50na+vr8PHc4Ty+n6bBQEFAHDdPD095enp6ewyblj9+vWdXQLy4RAPAAAwHQIKAAAwHQIKAAAwHQIKAAAwHQIKAAAwHa7iAYBS2t+4yU0dr8mB/Td1PBTvjTfeUHx8vH766SclJSWV2511ly5dqsjISP3xxx+SpNjYWH366afavXu3JGnIkCH6448/9Omnn5bL+GbBDAoAoFxZLBbTfZiWtabs7GxFRERo/Pjx+umnn8r8vJuy6Nevnw4ePFhu+68omEEBgNvUpUuXVLVqVWeXUWrOrPfYsWO6fPmyunfvrjp16pTrWG5ubnJzcyvXMSoCZlAA4BYRGhqqiIgIRUREyNvbWz4+PoqJiVHeM2GtVqumTZumIUOGyMvLS8OGDZMkffzxxwoODparq6usVqsSExPt9mu1WhUfH68BAwbIw8ND/v7+mjdvXqlqslqtkqQ+ffrIYrHYlg8fPqxevXrJ19dXHh4eateundavX19g28LqXbx4sQICAuTu7q4+ffooKSlJ3t7edtt+9tlnatOmjapVq6aGDRsqLi5OV65cKbamoixdulTNmzeXJDVs2FAWi0VHjx4t02sYPHiwPDw8VL9+ff373//Wr7/+ql69esnDw0PNmzfXjh077MbL/3qKYxiGEhIS1LBhQ7m5uemee+7Rv/71r1Jvb1YODyhXrlxRTEyMGjRoIDc3NzVs2FBTp05Vbm6urY9hGIqNjZW/v7/c3NwUGhqqvXv3OroUALjtpKSkqHLlyvr66681d+5czZkzR2+++aZt/axZs9SsWTNlZGRo8uTJysjI0BNPPKH+/ftrz549io2N1eTJk7V06VK7/c6aNUstWrTQzp07NXHiRI0dO1br1q0rsZ7t27dLkpYsWaITJ07Yls+ePatHHnlE69ev165duxQeHq4ePXro2LFjBca9tt60tDSNGDFCY8aM0e7duxUWFqbp06fbbbNmzRoNGjRIL7zwgvbt26dFixZp6dKltn5F1VSUfv362YLHtm3bdOLECQUEBJT6NcyZM0chISHatWuXunfvrqeeekqDBw/WoEGDtHPnTgUGBmrw4MG2IFlWMTExWrJkiRYsWKC9e/dq7NixGjRokDZt2nRd+zMLhx/iefXVV7Vw4UKlpKQoODhYO3bs0N/+9jd5eXlpzJgxkqSEhAQlJSVp6dKlatSokaZNm6awsDBlZmbeErdMBgBnCQgI0Jw5c2SxWHT33Xdrz549mjNnjm324S9/+YteeuklW/+BAweqS5cumjx5siSpUaNG2rdvn2bNmqUhQ4bY+oWEhGjChAm2PmlpaZozZ47CwsKKrefOO++UJHl7e8vPz8/Wfs899+iee+6xLU+bNk3Lly/XihUrFBERYWvPX29MTIy6detma2vUqJHS09OVmppq6zN9+nRNmDBBTz/9tKSrsx7x8fGKjo7Wyy+/XGRNRXFzc5OPj4/t9eRtU9rX8Mgjj2j48OGSpClTpmjBggVq166dHn/8cUnS+PHj1bFjR/3yyy+lquda586dU1JSkj7//HN17NjR9nq3bNmiRYsWqXPnzmXan5k4fAblq6++Uq9evdS9e3dZrVY99thj6tq1q236yjAMJScna9KkSerbt6+aNWumlJQUnT9/Xu+++66jywGA28q9994ri8ViW+7YsaMOHTqknJwcSVLbtm3t+u/fv18hISF2bSEhIXbb5O3nWh07dtT+/dd/ldG5c+cUHR2tpk2bytvbWx4eHjpw4ECB2Yf89WZmZqp9+/Z2bfmXMzIyNHXqVLsnEg8bNkwnTpzQ+fPnr7vm630NLVq0sP0778GDeYeMrm3Lysoqcw379u3Tn3/+qbCwMLvXu2zZMh0+fPh6XpZpOHwG5b777tPChQt18OBBNWrUSN988422bNmi5ORkSdKRI0d08uRJde3a1baNq6urOnfurPT0dFvKvNbFixd18eJF23J2drajywaA20L16tXtlg3DsAs0eW2lkX+7shg3bpzWrFmj2bNnKzAwUG5ubnrsscd06dKlG643NzdXcXFx6tu3b4Fxq1Wrdt0151fa13DtU53zai+s7dpTIUorb5uVK1eqbt26dutcXV3LvD8zcXhAGT9+vE6fPq3GjRvLxcVFOTk5mj59up588klJ0smTJyUVfHy1r6+vfvjhh0L3OXPmTMXFxTm6VAC45WzdurXAclBQkFxcXArt37RpU23ZssWuLT09XY0aNbLbprD9Nm7cuFQ1ValSxW42RpI2b96sIUOGqE+fPpKunpNy9OjREvfVuHFjbdu2za7t2hNMJal169bKzMxUYGBgmWoqq+t9DY7UtGlTubq66tixYxX6cE5hHB5QPvjgA7399tt69913FRwcrN27dysyMlL+/v6244FSweRdWCrOM3HiREVFRdmWs7OzFRAQ4OjSAaDCO378uKKiojR8+HDt3LlT8+bNK3BVzrVefPFFtWvXTvHx8erXr5+++uorzZ8/X6+//rpdv7S0NCUkJKh3795at26dPvroI61cubJUNVmtVm3YsEEhISFydXVVzZo1FRgYqE8++UQ9evSQxWLR5MmTSzWDMHr0aD3wwANKSkpSjx499Pnnn2v16tV2nx9TpkzRo48+qoCAAD3++OOqVKmSvv32W+3Zs0fTpk0rsqayut7X4Eienp566aWXNHbsWOXm5uq+++5Tdna20tPT5eHhYfe5W9E4PKCMGzdOEyZMUP/+/SVdPc72ww8/aObMmXr66adtJwCdPHnS7lryrKysArMqeVxdXSv8VBWAiq8i3Nl18ODBunDhgtq3by8XFxeNHj262JuKtW7dWh9++KGmTJmi+Ph41alTR1OnTrU7QVa6GmQyMjIUFxcnT09PJSYmKjw8vFQ1JSYmKioqSosXL1bdunV19OhRzZkzR0OHDlWnTp1Uq1YtjR8/vlSH70NCQrRw4ULFxcUpJiZG4eHhGjt2rObPn2/rEx4ertTUVE2dOlUJCQmqUqWKGjdurGeffbbYmsrqel+Do8XHx6t27dqaOXOmvv/+e3l7e6t169b6+9//ftNrcSSLcb3XNRXBx8dH06ZN08iRI21tM2fO1JIlS3Tw4EEZhiF/f3+NHTtW0dHRkq7efKd27dp69dVXCz0HJb/s7Gx5eXnp9OnTqlGjhiPLNz3rhNL9xVIejr7S3WljAzfLn3/+qSNHjqhBgwYOPV/hZggNDVXLli1t5/w5itVqVWRkZLnd2v1GDRs2TAcOHNDmzZudXQpU/O9QWT6/HT6D0qNHD02fPl316tVTcHCwdu3apaSkJA0dOlTS1UM7kZGRmjFjhoKCghQUFKQZM2bI3d1dAwYMcHQ5AIBbzOzZsxUWFqbq1atr9erVSklJKXBIChWfwwPKvHnzNHnyZD3//PPKysqSv7+/hg8frilTptj6REdH68KFC3r++ed16tQpdejQQWvXruUeKABQwbzzzjtFznzXr1+/XG7CuW3bNiUkJOjMmTNq2LCh5s6da3f45noEBwcXeaHGokWLNHDgwBvaP8rO4Yd4bgYO8TgHh3hwO6jIh3ic4cyZM/rll18KXVelShXVr1//Jld0fX744Qddvny50HW+vr78AV0Gpj3EAwC4fXh6et4SH94VJUjdTnhYIAAUogJOLgOm4KjfHQIKAFwj7w6fjrwlOnA7ybuTblE3BywtDvEAwDVcXFzk7e1tey6Ku7v7Dd3SHbid5Obm6tdff5W7u7sqV76xiEFAAYB88m4oeT0PbwNud5UqVVK9evVuONgTUAAgH4vFojp16qh27dpFXtkBoHBVq1ZVpUo3fgYJAQUAiuDi4nLDx9EBXB9OkgUAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZDQAEAAKZTLgHlp59+0qBBg+Tj4yN3d3e1bNlSGRkZtvWGYSg2Nlb+/v5yc3NTaGio9u7dWx6lAACACsjhAeXUqVMKCQlRlSpVtHr1au3bt0+JiYny9va29UlISFBSUpLmz5+v7du3y8/PT2FhYTpz5oyjywEAABVQZUfv8NVXX1VAQICWLFlia7NarbZ/G4ah5ORkTZo0SX379pUkpaSkyNfXV++++66GDx/u6JIAAEAF4/AZlBUrVqht27Z6/PHHVbt2bbVq1UqLFy+2rT9y5IhOnjyprl272tpcXV3VuXNnpaenF7rPixcvKjs72+4LAADcuhweUL7//nstWLBAQUFBWrNmjUaMGKEXXnhBy5YtkySdPHlSkuTr62u3na+vr21dfjNnzpSXl5ftKyAgwNFlAwAAE3F4QMnNzVXr1q01Y8YMtWrVSsOHD9ewYcO0YMECu34Wi8Vu2TCMAm15Jk6cqNOnT9u+jh8/7uiyAQCAiTg8oNSpU0dNmza1a2vSpImOHTsmSfLz85OkArMlWVlZBWZV8ri6uqpGjRp2XwAA4Nbl8IASEhKizMxMu7aDBw+qfv36kqQGDRrIz89P69ats62/dOmSNm3apE6dOjm6HAAAUAE5/CqesWPHqlOnTpoxY4aeeOIJbdu2TW+88YbeeOMNSVcP7URGRmrGjBkKCgpSUFCQZsyYIXd3dw0YMMDR5QAAgArI4QGlXbt2Wr58uSZOnKipU6eqQYMGSk5O1sCBA219oqOjdeHCBT3//PM6deqUOnTooLVr18rT09PR5QAAgArIYhiG4ewiyio7O1teXl46ffr0bXc+inXCSqeNffSV7k4bGwBQ8ZXl85tn8QAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMhoAAAANMp94Ayc+ZMWSwWRUZG2toMw1BsbKz8/f3l5uam0NBQ7d27t7xLAQAAFUS5BpTt27frjTfeUIsWLezaExISlJSUpPnz52v79u3y8/NTWFiYzpw5U57lAACACqLcAsrZs2c1cOBALV68WDVr1rS1G4ah5ORkTZo0SX379lWzZs2UkpKi8+fP69133y10XxcvXlR2drbdFwAAuHWVW0AZNWqUunfvroceesiu/ciRIzp58qS6du1qa3N1dVXnzp2Vnp5e6L5mzpwpLy8v21dAQEB5lQ0AAEygXALK+++/r507d2rmzJkF1p08eVKS5Ovra9fu6+trW5ffxIkTdfr0advX8ePHHV80AAAwjcqO3uHx48c1ZswYrV27VtWqVSuyn8VisVs2DKNAWx5XV1e5uro6tE4AAGBeDp9BycjIUFZWltq0aaPKlSurcuXK2rRpk+bOnavKlSvbZk7yz5ZkZWUVmFUBAAC3J4cHlC5dumjPnj3avXu37att27YaOHCgdu/erYYNG8rPz0/r1q2zbXPp0iVt2rRJnTp1cnQ5AACgAnL4IR5PT081a9bMrq169ery8fGxtUdGRmrGjBkKCgpSUFCQZsyYIXd3dw0YMMDR5QAAgArI4QGlNKKjo3XhwgU9//zzOnXqlDp06KC1a9fK09PTGeUAAACTsRiGYTi7iLLKzs6Wl5eXTp8+rRo1aji7nJvKOmGl08Y++kp3p40NAKj4yvL5zbN4AACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6RBQAACA6Tg8oMycOVPt2rWTp6enateurd69eyszM9Ouj2EYio2Nlb+/v9zc3BQaGqq9e/c6uhQAAFBBOTygbNq0SaNGjdLWrVu1bt06XblyRV27dtW5c+dsfRISEpSUlKT58+dr+/bt8vPzU1hYmM6cOePocgAAQAVU2dE7/M9//mO3vGTJEtWuXVsZGRl64IEHZBiGkpOTNWnSJPXt21eSlJKSIl9fX7377rsaPnx4gX1evHhRFy9etC1nZ2c7umwAAGAi5X4OyunTpyVJd9xxhyTpyJEjOnnypLp27Wrr4+rqqs6dOys9Pb3QfcycOVNeXl62r4CAgPIuGwAAOFG5BhTDMBQVFaX77rtPzZo1kySdPHlSkuTr62vX19fX17Yuv4kTJ+r06dO2r+PHj5dn2QAAwMkcfojnWhEREfr222+1ZcuWAussFovdsmEYBdryuLq6ytXVtVxqBAAA5lNuMyijR4/WihUr9MUXX+iuu+6ytfv5+UlSgdmSrKysArMqAADg9uTwgGIYhiIiIvTJJ5/o888/V4MGDezWN2jQQH5+flq3bp2t7dKlS9q0aZM6derk6HIAAEAF5PBDPKNGjdK7776rf//73/L09LTNlHh5ecnNzU0Wi0WRkZGaMWOGgoKCFBQUpBkzZsjd3V0DBgxwdDkAAKACcnhAWbBggSQpNDTUrn3JkiUaMmSIJCk6OloXLlzQ888/r1OnTqlDhw5au3atPD09HV0OAACogBweUAzDKLGPxWJRbGysYmNjHT08AAC4BfAsHgAAYDoEFAAAYDrleh8UwFGapzR32th7nt7jtLEB4HbFDAoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdbtSG0ov1ct7YDeo5b2wAwE3HDAoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADAdAgoAADCdys4uADC7/Y2bOG3sJgf2O21sAHAmZlAAAIDpEFAAAIDpEFAAAIDpEFAAAIDpEFAAAIDpEFAAAIDpEFAAAIDpEFAAAIDpOPVGba+//rpmzZqlEydOKDg4WMnJybr//vudWRIA3J5ivZw49mnnjQ3TctoMygcffKDIyEhNmjRJu3bt0v33369u3brp2LFjzioJAACYhNMCSlJSkp555hk9++yzatKkiZKTkxUQEKAFCxY4qyQAAGASTjnEc+nSJWVkZGjChAl27V27dlV6enqB/hcvXtTFixdty6dPX50OzM7OLt9CTSj34nmnjZ1tMZw2ds6FHKeNfTbHeWPfjj/jcJKLzvv9Fj/nt428/9MMo+SfN6cElN9++005OTny9fW1a/f19dXJkycL9J85c6bi4uIKtAcEBJRbjSjIiUeoJTnvoXntnTayJC/nvuvATfEKP+e3mzNnzsirhP/fnHqSrMVisVs2DKNAmyRNnDhRUVFRtuXc3Fz9/vvv8vHxKbQ/bi3Z2dkKCAjQ8ePHVaNGDWeXA8CB+P2+vRiGoTNnzsjf37/Evk4JKLVq1ZKLi0uB2ZKsrKwCsyqS5OrqKldXV7s2b2/v8iwRJlSjRg3+AwNuUfx+3z5KmjnJ45STZKtWrao2bdpo3bp1du3r1q1Tp06dnFESAAAwEacd4omKitJTTz2ltm3bqmPHjnrjjTd07NgxjRgxwlklAQAAk3BaQOnXr5/+97//aerUqTpx4oSaNWumVatWqX79+s4qCSbl6uqql19+ucBhPgAVH7/fKIrFKM21PgAAADcRz+IBAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0ABAACmQ0BBhXP8+HENHTrU2WUAuA4XLlzQli1btG/fvgLr/vzzTy1btswJVcGMuA8KKpxvvvlGrVu3Vk5OjrNLAVAGBw8eVNeuXXXs2DFZLBbdf//9eu+991SnTh1J0i+//CJ/f39+tyHJyU8zBgqzYsWKYtd///33N6kSAI40fvx4NW/eXDt27NAff/yhqKgohYSEaOPGjapXr56zy4PJMIMC06lUqZIsFouK+9G0WCz8lQVUML6+vlq/fr2aN29uaxs1apRSU1P1xRdfqHr16sygwIZzUGA6derU0ccff6zc3NxCv3bu3OnsEgFchwsXLqhyZfuJ+9dee009e/ZU586ddfDgQSdVBjMioMB02rRpU2wIKWl2BYA5NW7cWDt27CjQPm/ePPXq1Us9e/Z0QlUwKwIKTGfcuHHq1KlTkesDAwP1xRdf3MSKADhCnz599N577xW6bv78+XryySf54wM2nIMCAABMhxkUAABgOgQUAABgOgQUAABgOgQUAABgOgQUAIUKDQ1VZGSks8sAcJsioAAAANMhoAC4aXJycpSbm+vsMgBUAAQUwORCQ0P1wgsvKDo6WnfccYf8/PwUGxsrSTp69KgsFot2795t6//HH3/IYrFo48aNkqSNGzfKYrFozZo1atWqldzc3PSXv/xFWVlZWr16tZo0aaIaNWroySef1Pnz5+3GvnLliiIiIuTt7S0fHx/FxMTY3Ujr0qVLio6OVt26dVW9enV16NDBNq4kLV26VN7e3kpNTVXTpk3l6uqqH374ocTX/NZbbyk4OFiurq6qU6eOIiIibOuOHTumXr16ycPDQzVq1NATTzyhX375xbY+NjZWLVu21FtvvaV69erJw8NDI0eOVE5OjhISEuTn56fatWtr+vTpdmNaLBYtWLBA3bp1k5ubmxo0aKCPPvrIrs/48ePVqFEjubu7q2HDhpo8ebIuX75cYOx//vOfslqt8vLyUv/+/XXmzBlJ0rJly+Tj46OLFy/a7fevf/2rBg8eXOL7AtxWDACm1rlzZ6NGjRpGbGyscfDgQSMlJcWwWCzG2rVrjSNHjhiSjF27dtn6nzp1ypBkfPHFF4ZhGMYXX3xhSDLuvfdeY8uWLcbOnTuNwMBAo3PnzkbXrl2NnTt3Gl9++aXh4+NjvPLKK3bjenh4GGPGjDEOHDhgvP3224a7u7vxxhtv2PoMGDDA6NSpk/Hll18a3333nTFr1izD1dXVOHjwoGEYhrFkyRKjSpUqRqdOnYy0tDTjwIEDxtmzZ4t9va+//rpRrVo1Izk52cjMzDS2bdtmzJkzxzAMw8jNzTVatWpl3HfffcaOHTuMrVu3Gq1btzY6d+5s2/7ll182PDw8jMcee8zYu3evsWLFCqNq1apGeHi4MXr0aOPAgQPGW2+9ZUgyvvrqK9t2kgwfHx9j8eLFRmZmphETE2O4uLgY+/bts/WJj4830tLSjCNHjhgrVqwwfH19jVdffbXA2H379jX27NljfPnll4afn5/x97//3TAMwzh//rzh5eVlfPjhh7Ztfv31V6Nq1arG559/XsJPAnB7IaAAJte5c2fjvvvus2tr166dMX78+DIFlPXr19v6zJw505BkHD582NY2fPhwIzw83G7cJk2aGLm5uba28ePHG02aNDEMwzC+++47w2KxGD/99JNdbV26dDEmTpxoGMbVgCLJ2L17d6lfr7+/vzFp0qRC161du9ZwcXExjh07Zmvbu3evIcnYtm2bYRhXQ4K7u7uRnZ1t6xMeHm5YrVYjJyfH1nb33XcbM2fOtC1LMkaMGGE3XocOHYyRI0cWWWtCQoLRpk0b23JhY48bN87o0KGDbXnkyJFGt27dbMvJyclGw4YN7d5nAIZRufB5FQBm0qJFC7vlOnXqKCsr67r34evraztMcW3btm3b7La59957ZbFYbMsdO3ZUYmKicnJytHPnThmGoUaNGtltc/HiRfn4+NiWq1atWqD+omRlZennn39Wly5dCl2/f/9+BQQEKCAgwNbWtGlTeXt7a//+/WrXrp0kyWq1ytPT0+61ubi4qFKlSnZt+d/Djh07Fli+9vDZv/71LyUnJ+u7777T2bNndeXKFdWoUcNum/xj5/9eDRs2TO3atdNPP/2kunXrasmSJRoyZIjd+wxAIqAAFUCVKlXsli0Wi3Jzc20fuMY154Vce05EUfuwWCxF7rO0cnNz5eLiooyMDLm4uNit8/DwsP3bzc2t1B++bm5uxa43DKPQfeVvL+y1Xe/rzdvv1q1b1b9/f8XFxSk8PFxeXl56//33lZiYaNe/pHFatWqle+65R8uWLVN4eLj27Nmjzz77rMQ6gNsNAQWowO68805J0okTJ9SqVStJsvuL/0Zt3bq1wHJQUJBcXFzUqlUr5eTkKCsrS/fff79DxvP09JTVatWGDRv04IMPFljftGlTHTt2TMePH7fNouzbt0+nT59WkyZNbnj8rVu32p2sunXrVtv7mpaWpvr162vSpEm29aU54bcwzz77rObMmaOffvpJDz30kN2MEICrCChABebm5qZ7771Xr7zyiqxWq3777TfFxMQ4bP/Hjx9XVFSUhg8frp07d2revHm2GYNGjRpp4MCBGjx4sBITE9WqVSv99ttv+vzzz9W8eXM98sgj1zVmbGysRowYodq1a6tbt246c+aM0tLSNHr0aD300ENq0aKFBg4cqOTkZF25ckXPP/+8OnfurLZt297w6/3oo4/Utm1b3XfffXrnnXe0bds2/eMf/5AkBQYG6tixY3r//ffVrl07rVy5UsuXL7+ucQYOHKiXXnpJixcv1rJly264buBWxGXGQAX31ltv6fLly2rbtq3GjBmjadOmOWzfgwcP1oULF9S+fXuNGjVKo0eP1nPPPWdbv2TJEg0ePFgvvvii7r77bvXs2VNff/31Dc0IPP3000pOTtbrr7+u4OBgPfroozp06JCkq4dLPv30U9WsWVMPPPCAHnroITVs2FAffPDBDb9WSYqLi9P777+vFi1aKCUlRe+8846aNm0qSerVq5fGjh2riIgItWzZUunp6Zo8efJ1jVOjRg399a9/lYeHh3r37u2Q2oFbjcW49uA1ANymLBaLli9fftMCQ1hYmJo0aaK5c+felPGAioZDPABwE/3+++9au3atPv/8c82fP9/Z5QCmRUABcFNde4VPfqtXr3bYCbdm1bp1a506dUqvvvqq7r77bmeXA5gWh3gA3FTfffddkevq1q1b4qXGAG4PBBQAAGA6XMUDAABMh4ACAABMh4ACAABMh4ACAABMh4ACAABMh4ACAABMh4ACAABM5/8D7w9aXckH0jYAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "target_description(targets)" ] }, { "cell_type": "markdown", "id": "5d91263e-8a97-4cb1-8d94-db8ab0b77cdf", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Brouillon" ] }, { "cell_type": "code", "execution_count": null, "id": "c5e864b1-adad-4267-b956-3f7ef371d677", "metadata": {}, "outputs": [], "source": [ "\n", "def display_covering_time(df, company, datecover):\n", " \"\"\"\n", " This function draws the time coverage of each company\n", " \"\"\"\n", " min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n", " max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n", " datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n", " print(f'Couverture Company {company} : {min_date} - {max_date}')\n", " return datecover\n", "\n", "\n", "def compute_time_intersection(datecover):\n", " \"\"\"\n", " This function returns the time coverage for all companies\n", " \"\"\"\n", " timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n", " intersection = set.intersection(*timestamps_sets)\n", " intersection_list = list(intersection)\n", " formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n", " return sorted(formated_dates)\n", "\n", "\n", "def df_coverage_modelization(sport, coverage_features = 0.7):\n", " \"\"\"\n", " This function returns start_date, end_of_features and final dates\n", " that help to construct train and test datasets\n", " \"\"\"\n", " datecover = {}\n", " for company in sport:\n", " df_products_purchased_reduced = display_input_databases(company, file_name = \"products_purchased_reduced\",\n", " datetime_col = ['purchase_date'])\n", " datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n", " #print(datecover.keys())\n", " dt_coverage = compute_time_intersection(datecover)\n", " start_date = dt_coverage[0]\n", " end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n", " final_date = dt_coverage[-1]\n", " return start_date, end_of_features, final_date\n", " " ] }, { "cell_type": "markdown", "id": "2435097a-95a5-43e1-84d0-7f6b701441ba", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Bases non communes : mise à plat" ] }, { "cell_type": "code", "execution_count": null, "id": "f8f988fb-5aab-4b57-80d1-e242f7e5b384", "metadata": {}, "outputs": [], "source": [ "companies = {'musee' : ['1', '2', '3', '4'],\n", " 'sport': ['5', '6', '7', '8', '9'],\n", " 'musique' : ['10', '11', '12', '13', '14']}\n", "\n", "all_companies = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']" ] }, { "cell_type": "code", "execution_count": null, "id": "35ac004f-c191-4f45-a4b1-6d993d9ec38c", "metadata": {}, "outputs": [], "source": [ "companies_databases = pd.DataFrame()\n", "\n", "for i in all_companies:\n", " company_databases = pd.DataFrame({'company_number' : [i]})\n", "\n", " BUCKET = \"bdc2324-data/\"+i\n", " for base in fs.ls(BUCKET):\n", " match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n", " if match:\n", " nom_base = match.group(3)\n", " company_databases[nom_base] = 1\n", "\n", " companies_databases = pd.concat([companies_databases, company_databases])" ] }, { "cell_type": "code", "execution_count": null, "id": "8986e477-e6c5-4d6c-83b2-2c90c134b599", "metadata": {}, "outputs": [], "source": [ "pd.set_option(\"display.max_columns\", None)\n", "companies_databases\n" ] }, { "cell_type": "code", "execution_count": null, "id": "8fecc3bb-4c03-4144-97c5-615224d9729e", "metadata": {}, "outputs": [], "source": [ "pd.reset_option(\"display.max_columns\")" ] }, { "cell_type": "markdown", "id": "0294ce71-840e-458b-8ffa-cadabbc6da21", "metadata": {}, "source": [ "# Debut Travail 25/02" ] }, { "cell_type": "markdown", "id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1", "metadata": {}, "source": [ "## Base communes au types Musée" ] }, { "cell_type": "code", "execution_count": null, "id": "5080f66e-f779-410a-876d-b4fe2795e17e", "metadata": {}, "outputs": [], "source": [ "for i in companies['musique']:\n", " BUCKET = \"bdc2324-data/\"+i\n", " liste_base = []\n", " for base in fs.ls(BUCKET):\n", " match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n", " if match:\n", " nom_base = match.group(3)\n", " liste_base.append(nom_base)\n", " globals()['base_'+i] = liste_base\n" ] }, { "cell_type": "code", "execution_count": null, "id": "abd477e1-7479-4c88-a5aa-f987af3f5b79", "metadata": {}, "outputs": [], "source": [ "# Trouver l'intersection entre les cinq listes\n", "intersection = set(base_1).intersection(base_2, base_3, base_4, base_101)\n", "\n", "# Convertir le résultat en liste si nécessaire\n", "intersection_liste = list(intersection)\n", "\n", "print(intersection_liste)" ] }, { "cell_type": "code", "execution_count": null, "id": "8d93888f-a511-4ee5-8bc3-d5173a7f119e", "metadata": {}, "outputs": [], "source": [ "# Trouver l'intersection entre les cinq listes\n", "intersection = set(base_10).intersection(base_12, base_13, base_14, base_11)\n", "\n", "# Convertir le résultat en liste si nécessaire\n", "intersection_liste = list(intersection)\n", "\n", "print(intersection_liste)" ] }, { "cell_type": "code", "execution_count": null, "id": "10e89669-42bb-4652-a4bc-1a3d1caf4d1a", "metadata": {}, "outputs": [], "source": [ "len(intersection_liste)" ] }, { "cell_type": "code", "execution_count": null, "id": "7d058b21-a538-4f59-aefb-ef7966f73fdc", "metadata": {}, "outputs": [], "source": [ "df1_tags = load_dataset_2(\"1\", \"tags\")" ] }, { "cell_type": "code", "execution_count": null, "id": "aa441f99-733c-4675-8676-bed4682d3324", "metadata": {}, "outputs": [], "source": [ "df1_structure_tag_mappings = load_dataset_2(\"1\", 'structure_tag_mappings')" ] }, { "cell_type": "code", "execution_count": null, "id": "6767a750-14a4-4c05-903e-d2f07170825b", "metadata": {}, "outputs": [], "source": [ "df1_customersplus = load_dataset_2(\"1\", \"customersplus\")" ] }, { "cell_type": "code", "execution_count": null, "id": "125e9145-a815-46fd-bdf4-07589508b259", "metadata": {}, "outputs": [], "source": [ "df1_customersplus.groupby('structure_id')['id'].count().reset_index().sort_values('id', ascending=False).head(20)" ] }, { "cell_type": "code", "execution_count": null, "id": "c17a6976-792f-474d-bcff-c89396eddb3f", "metadata": {}, "outputs": [], "source": [ "df1_customersplus['structure_id'].isna().sum() / len(df1_customersplus['structure_id'])" ] }, { "cell_type": "code", "execution_count": null, "id": "ecfc155a-cb42-46ec-8da5-33fdcd087355", "metadata": {}, "outputs": [], "source": [ "len(df1_structure_tag_mappings)" ] }, { "cell_type": "code", "execution_count": null, "id": "071410b8-950d-4fcc-b2b9-57415253c286", "metadata": {}, "outputs": [], "source": [ "df1_structure_tag_mappings.groupby('tag_id')['structure_id'].count().reset_index().sort_values('structure_id', ascending=False).head(20)" ] }, { "cell_type": "code", "execution_count": null, "id": "f48d27a9-14e4-4bb9-a60a-73e9438b58fc", "metadata": {}, "outputs": [], "source": [ "?np.sort_values()" ] }, { "cell_type": "code", "execution_count": null, "id": "14eaa0ea-02cc-430b-ab9b-38e6637810c3", "metadata": {}, "outputs": [], "source": [ "def info_colonnes_dataframe(df):\n", " # Créer une liste pour stocker les informations sur chaque colonne\n", " infos_colonnes = []\n", "\n", " # Parcourir les colonnes du DataFrame\n", " for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n", " # Calculer le taux de valeurs manquantes\n", " taux_na = serie.isna().mean() * 100\n", "\n", " # Ajouter les informations à la liste\n", " infos_colonnes.append({\n", " 'Nom_colonne': nom_colonne,\n", " 'Type_colonne': str(serie.dtype),\n", " 'Taux_NA': taux_na\n", " })\n", "\n", " # Créer une nouvelle DataFrame à partir de la liste d'informations\n", " df_infos_colonnes = pd.DataFrame(infos_colonnes)\n", "\n", " return df_infos_colonnes" ] }, { "cell_type": "code", "execution_count": null, "id": "6b031c32-d4c8-42a5-9a71-a7810f9bf8d8", "metadata": { "scrolled": true }, "outputs": [], "source": [ "info_colonnes_dataframe(df1_tags)" ] }, { "cell_type": "code", "execution_count": null, "id": "e1a87f27-c4d4-4832-ac20-0c3c54aa4980", "metadata": {}, "outputs": [], "source": [ "info_colonnes_dataframe(df1_structure_tag_mappings)" ] }, { "cell_type": "code", "execution_count": null, "id": "fa5c65a8-2f74-4f3f-85fc-9ac91e0bb361", "metadata": {}, "outputs": [], "source": [ "pd.set_option('display.max_colwidth', None)\n", "\n", "print(df1_tags['name'])" ] }, { "cell_type": "code", "execution_count": null, "id": "a59bf932-5b54-4600-81f5-c55ac93ae510", "metadata": {}, "outputs": [], "source": [ "pd.set_option('display.max_rows', None)" ] }, { "cell_type": "code", "execution_count": null, "id": "a4ab298e-2cae-4865-9f00-4caff5f75ea1", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print(df1_tags['name'])" ] }, { "cell_type": "markdown", "id": "76bffba1-5f7e-4308-9224-437ca66148f8", "metadata": {}, "source": [ "## KPI sur target_type" ] }, { "cell_type": "code", "execution_count": null, "id": "f6daf22e-6583-4431-a467-660a1dd4e5a4", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d91d5895", "metadata": {}, "outputs": [], "source": [ "pd.set_option('display.max_colwidth', None)\n" ] }, { "cell_type": "markdown", "id": "c58b17d3", "metadata": {}, "source": [ "Raisonnement : on prends les target_type qui représente 90% des clients et on fait des catégories dessus." ] }, { "cell_type": "code", "execution_count": null, "id": "6930bff5", "metadata": {}, "outputs": [], "source": [ "def print_main_target(tenant_id, nb_print = 40):\n", " df_target = display_input_databases(tenant_id, \"target_information\")\n", "\n", " print('Nombre de ciblage : ', len(df_target))\n", " nb_customers = df_target['customer_id'].nunique()\n", " print('Nombre de client avec étiquette target : ', nb_customers) \n", "\n", " nb_custumers_per_target = df_target.groupby(\"target_name\")['customer_id'].count().reset_index().sort_values('customer_id', ascending=False)\n", " nb_custumers_per_target['cumulative_customers'] = nb_custumers_per_target['customer_id'].cumsum()/len(df_target)\n", " nb_custumers_per_target['customer_id'] = nb_custumers_per_target['customer_id']/nb_customers\n", "\n", " return nb_custumers_per_target.head(nb_print)" ] }, { "cell_type": "code", "execution_count": null, "id": "1e7ee1a0", "metadata": { "scrolled": true }, "outputs": [], "source": [ "pd.set_option(\"max_colwidth\", None)\n", "print_main_target('1', 60)" ] }, { "cell_type": "code", "execution_count": null, "id": "19f3a2dd-ba3d-4dec-8e10-fed544ab6a53", "metadata": {}, "outputs": [], "source": [ "pd.reset_option('display.max_rows')" ] }, { "cell_type": "code", "execution_count": null, "id": "b57a28ac", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('2', 25)" ] }, { "cell_type": "code", "execution_count": null, "id": "9a65991f", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('3', 70)" ] }, { "cell_type": "code", "execution_count": null, "id": "5f34b8bf", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('4', 100)" ] }, { "cell_type": "code", "execution_count": null, "id": "52b24d66-92ad-4421-a62b-5cba837f1893", "metadata": {}, "outputs": [], "source": [ "pd.set_option('display.max_rows', None)" ] }, { "cell_type": "code", "execution_count": null, "id": "40fe3676", "metadata": { "scrolled": true }, "outputs": [], "source": [ "\n", "\n", "print_main_target('5', 100)" ] }, { "cell_type": "code", "execution_count": null, "id": "820d3600-379b-4245-a977-f1f1fa1f1839", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('6', 100)" ] }, { "cell_type": "code", "execution_count": null, "id": "86f64a1b-763a-4e43-9601-a38c80392d47", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('7', 100)" ] }, { "cell_type": "code", "execution_count": null, "id": "fbf2ea42-515a-4cdf-a4c1-50f99c379ed9", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('8', 100)" ] }, { "cell_type": "code", "execution_count": null, "id": "9684045c-4e25-4952-b099-a559baa5d749", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('9', 100)" ] }, { "cell_type": "code", "execution_count": null, "id": "cf8f7816-e7f3-4b7a-a987-8350a76eb140", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('10', 100)" ] }, { "cell_type": "code", "execution_count": null, "id": "76c818a5-3c52-4d97-ac81-b7f3f89092bd", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('11', 100)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "603b11e4-5d76-4699-a1b2-e795929edc04", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('12', 100)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "fa93aecd-d117-481e-8507-15e49937ce14", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('13', 100)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "a115ebcf-4488-47f3-9d7e-75a1fca52f0f", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print_main_target('14', 100)\n" ] }, { "cell_type": "markdown", "id": "605cced5-052f-4a99-ac26-020c5d2ab633", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "## KPI sur tags" ] }, { "cell_type": "code", "execution_count": null, "id": "916c3e2b-04d3-4877-b894-8f26f10d926e", "metadata": {}, "outputs": [], "source": [ "customersplus = load_dataset_2(\"4\", \"customersplus\")[['id', 'structure_id']]" ] }, { "cell_type": "code", "execution_count": null, "id": "46847b24-15a4-464e-969f-f16ed3653f1f", "metadata": {}, "outputs": [], "source": [ "structure_tag_mappings = load_dataset_2('4', \"structure_tag_mappings\")[['structure_id', 'tag_id']]" ] }, { "cell_type": "code", "execution_count": null, "id": "3c10c69d-735f-453e-96bf-750697d965d0", "metadata": {}, "outputs": [], "source": [ "customersplus[customersplus['structure_id'].notna()]['structure_id'].nunique()" ] }, { "cell_type": "code", "execution_count": null, "id": "9b0e77b3-5f16-4484-9564-7d3826583418", "metadata": {}, "outputs": [], "source": [ "len(customersplus[customersplus['structure_id'].notna()])" ] }, { "cell_type": "code", "execution_count": null, "id": "dfa27722-37f9-435a-8221-8aa6f9a4a107", "metadata": {}, "outputs": [], "source": [ "structure_tag_mappings['structure_id'].nunique()" ] }, { "cell_type": "code", "execution_count": null, "id": "2daabdd5-31e3-4918-9856-9bbc30cde602", "metadata": {}, "outputs": [], "source": [ "def tags_information(tenant_id, first_tags):\n", "\n", " customersplus = load_dataset_2(tenant_id, \"customersplus\")[['id', 'structure_id']]\n", " customersplus.rename(columns = {'id' : 'customer_id'}, inplace = True)\n", " tags = load_dataset_2(tenant_id, \"tags\")[['id', 'name']]\n", " tags.rename(columns = {'id' : 'tag_id', 'name' : 'tag_name'}, inplace = True)\n", " structure_tag_mappings = load_dataset_2(tenant_id, \"structure_tag_mappings\")[['structure_id', 'tag_id']]\n", " \n", " customer_tags = pd.merge(customersplus, structure_tag_mappings, on = 'structure_id', how = 'left')\n", " customer_tags = pd.merge(customer_tags, tags, on = 'tag_id', how = 'inner')\n", " \n", " nb_customers_with_tag = customer_tags['customer_id'].nunique()\n", " \n", " print('Nombre de client avec tag : ', nb_customers_with_tag)\n", " print('Proportion de clients avec tags : ', nb_customers_with_tag/len(customersplus))\n", " print('Moyenne de tags par client : ', len(customer_tags)/nb_customers_with_tag)\n", " \n", " info = customer_tags.groupby(['tag_id', 'tag_name'])['customer_id'].count().reset_index().sort_values('customer_id', ascending = False).head(first_tags)\n", "\n", " return info" ] }, { "cell_type": "code", "execution_count": null, "id": "0b9f5f71-a927-4cc8-bb0c-9538e28d3553", "metadata": {}, "outputs": [], "source": [ "tags_information(\"1\", 20)" ] }, { "cell_type": "code", "execution_count": null, "id": "bd5bef41-1774-4601-86b5-b7c1aea8f1d2", "metadata": {}, "outputs": [], "source": [ "tags_information(\"2\", 20)" ] }, { "cell_type": "code", "execution_count": null, "id": "7c2dc3e6-1418-44db-a8c0-4a9d59ec5232", "metadata": {}, "outputs": [], "source": [ "load_dataset_2(\"2\", \"tags\")[['id', 'name']]" ] }, { "cell_type": "code", "execution_count": null, "id": "c7b2c670-7122-4f67-b1aa-8c80a10f16d8", "metadata": {}, "outputs": [], "source": [ "tags_information(\"3\", 20)" ] }, { "cell_type": "code", "execution_count": null, "id": "76639995-252d-4a58-83d8-c0c00900c3a9", "metadata": {}, "outputs": [], "source": [ "tags_information(\"4\", 20)" ] }, { "cell_type": "code", "execution_count": null, "id": "07e91791-d4d4-42b1-ac18-22d3b0b9f7bd", "metadata": {}, "outputs": [], "source": [ "tags_information(\"101\", 20)" ] }, { "cell_type": "markdown", "id": "87d131cd-ead0-4ef4-a8ee-b09022d08ffa", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "## KPI product" ] }, { "cell_type": "code", "execution_count": null, "id": "26582be9-cfd1-48ea-a0a7-31101fdeb9d1", "metadata": {}, "outputs": [], "source": [ "tenant_id = \"1\"\n", "\n", "df_product = display_databases(tenant_id, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n", "\n", "df_product.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "533bf499-dd56-4d29-b261-ca1e4928c9c7", "metadata": {}, "outputs": [], "source": [ "nb_tickets_per_events = df_product.groupby(['name_event_types', 'name_events'])['ticket_id'].count().reset_index().sort_values('ticket_id', ascending = False)\n", "nb_tickets_per_events['prop_tickets'] = round(nb_tickets_per_events['ticket_id']/len(df_product), 3)\n", "nb_tickets_per_events" ] }, { "cell_type": "markdown", "id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "# Fin travail 25/02" ] }, { "cell_type": "markdown", "id": "c437eaec", "metadata": {}, "source": [ "# Exemple sur Company 1" ] }, { "cell_type": "markdown", "id": "e855f403", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "## customersplus.csv" ] }, { "cell_type": "code", "execution_count": null, "id": "91a8f8c4", "metadata": {}, "outputs": [], "source": [ "a = pd.DataFrame(df1_customersplus.info())" ] }, { "cell_type": "code", "execution_count": null, "id": "2fda171d", "metadata": {}, "outputs": [], "source": [ "def info_colonnes_dataframe(df):\n", " # Créer une liste pour stocker les informations sur chaque colonne\n", " infos_colonnes = []\n", "\n", " # Parcourir les colonnes du DataFrame\n", " for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n", " # Calculer le taux de valeurs manquantes\n", " taux_na = serie.isna().mean() * 100\n", "\n", " # Ajouter les informations à la liste\n", " infos_colonnes.append({\n", " 'Nom_colonne': nom_colonne,\n", " 'Type_colonne': str(serie.dtype),\n", " 'Taux_NA': taux_na\n", " })\n", "\n", " # Créer une nouvelle DataFrame à partir de la liste d'informations\n", " df_infos_colonnes = pd.DataFrame(infos_colonnes)\n", "\n", " return df_infos_colonnes" ] }, { "cell_type": "code", "execution_count": null, "id": "205eeeab", "metadata": {}, "outputs": [], "source": [ "def cleaning_date(df, column_name):\n", " \"\"\"\n", " Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n", "\n", " Parameters:\n", " - df: DataFrame\n", " Le DataFrame contenant la colonne à nettoyer.\n", " - column_name: str\n", " Le nom de la colonne à nettoyer.\n", "\n", " Returns:\n", " - DataFrame\n", " Le DataFrame modifié avec la colonne nettoyée.\n", " \"\"\"\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", " return df" ] }, { "cell_type": "code", "execution_count": null, "id": "634282c5", "metadata": {}, "outputs": [], "source": [ "a = info_colonnes_dataframe(df1_customersplus)" ] }, { "cell_type": "code", "execution_count": null, "id": "0e8d4133", "metadata": {}, "outputs": [], "source": [ "a" ] }, { "cell_type": "code", "execution_count": null, "id": "1268ad5a", "metadata": {}, "outputs": [], "source": [ "a = pd.DataFrame(df1_customersplus.isna().sum()/len(df1_customersplus)*100)" ] }, { "cell_type": "code", "execution_count": null, "id": "bd41dc80", "metadata": {}, "outputs": [], "source": [ "# Selection des variables\n", "df1_customersplus_clean = df1_customersplus.copy()\n", "\n", "cleaning_date(df1_customersplus_clean, 'first_buying_date')\n", "cleaning_date(df1_customersplus_clean, 'last_visiting_date')\n", "\n", "df1_customersplus_clean.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n", "df1_customersplus_clean.rename(columns = {'id' : 'customer_id'}, inplace = True)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "2455d2e1", "metadata": { "scrolled": true }, "outputs": [], "source": [ "df1_purchases" ] }, { "cell_type": "code", "execution_count": null, "id": "5f9a159d", "metadata": {}, "outputs": [], "source": [ "df1_purchases.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "db201bf7", "metadata": {}, "outputs": [], "source": [ "# Nettoyage purchase_date\n", "df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], utc = True)\n", "df1_purchases['purchase_date'] = pd.to_datetime(df1_purchases['purchase_date'], format = 'ISO8601')" ] }, { "cell_type": "code", "execution_count": null, "id": "bd436fca", "metadata": {}, "outputs": [], "source": [ "df1_purchases.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "83435862", "metadata": {}, "outputs": [], "source": [ "# Selection des variables\n", "df1_purchases_clean = df1_purchases[['id', 'purchase_date', 'customer_id']]" ] }, { "cell_type": "markdown", "id": "637bdb72", "metadata": {}, "source": [ "# Customer information" ] }, { "cell_type": "markdown", "id": "14c52894", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "## Target area - NLP" ] }, { "cell_type": "code", "execution_count": null, "id": "d83abfbf", "metadata": {}, "outputs": [], "source": [ "# Target.csv cleaning\n", "df1_targets_clean = df1_targets[[\"id\", \"target_type_id\", \"name\"]]\n", "df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n", "\n", "# target_type cleaning\n", "df1_target_types_clean = df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n", "\n", "#customer_target_mappings cleaning\n", "df1_customer_target_mappings_clean = df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n", "\n", "# Merge target et target_type\n", "df1_targets_full = pd.merge(df1_targets_clean, df1_target_types_clean, left_on='target_type_id', right_on='target_type_id', how='inner')\n", "df1_targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n", "\n", "# Merge\n", "df1_targets_full = pd.merge(df1_customer_target_mappings_clean, df1_targets_full, left_on='target_id', right_on='target_id', how='inner')\n", "df1_targets_full.drop(['target_id'], axis = 1, inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "90d71b2c", "metadata": {}, "outputs": [], "source": [ "df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n", "len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n", "\n", "# 99,6% des 151 000 client visés sont catégorisés plusieurs fois et en moyenne 5 fois... \n", "df1_targets_test.mean()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "2301de1e", "metadata": {}, "outputs": [], "source": [ "df1_targets_full.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "75fbc2f7", "metadata": {}, "outputs": [], "source": [ "# Catégorisation des target_name\n", "import pandas as pd\n", "import nltk\n", "from nltk.tokenize import word_tokenize\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer\n", "from nltk.probability import FreqDist\n", "\n", "# Téléchargement des ressources nécessaires\n", "nltk.download('punkt')\n", "nltk.download('stopwords')\n", "nltk.download('wordnet')\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "55cddf92", "metadata": {}, "outputs": [], "source": [ "# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n", "def preprocess_text(texte):\n", " # Concaténation des éléments de la liste en une seule chaîne de caractères\n", " texte_concat = ' '.join(texte)\n", " \n", " # Tokenisation des mots\n", " tokens = word_tokenize(texte_concat.lower())\n", " \n", " # Suppression des mots vides (stopwords)\n", " stop_words = set(stopwords.words('french'))\n", " filtered_tokens = [word for word in tokens if word not in stop_words]\n", " \n", " # Lemmatisation des mots\n", " lemmatizer = WordNetLemmatizer()\n", " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n", " \n", " return lemmatized_tokens\n", "\n", "\n", "# Appliquer le prétraitement à la colonne de texte\n", "df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n", "\n", "# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n", "all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n", "\n", "# Calculer la fréquence des mots\n", "freq_dist = FreqDist(all_words)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7fd98a85", "metadata": {}, "outputs": [], "source": [ "# Affichage des mots les plus fréquents\n", "print(\"Mots les plus fréquents:\")\n", "for mot, freq in freq_dist.most_common(15):\n", " print(f\"{mot}: {freq}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "cf94bb1d", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import nltk\n", "from nltk.tokenize import word_tokenize\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer\n", "\n", "# Téléchargement des ressources nécessaires\n", "nltk.download('punkt')\n", "nltk.download('stopwords')\n", "nltk.download('wordnet')\n", "\n", "# Création de la DataFrame d'exemple\n", "data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n", "df = pd.DataFrame(data)\n", "\n", "# Fonction pour prétraiter le texte\n", "def preprocess_text(texte):\n", " # Concaténation des éléments de la liste en une seule chaîne de caractères\n", " texte_concat = ' '.join(texte)\n", " \n", " # Tokenisation des mots\n", " tokens = word_tokenize(texte_concat.lower())\n", " \n", " # Suppression des mots vides (stopwords)\n", " stop_words = set(stopwords.words('french'))\n", " filtered_tokens = [word for word in tokens if word not in stop_words]\n", " \n", " # Lemmatisation des mots\n", " lemmatizer = WordNetLemmatizer()\n", " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n", " \n", " return lemmatized_tokens\n", "\n", "# Appliquer la fonction de prétraitement à la colonne de texte\n", "df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n", "\n", "# Afficher le résultat\n", "print(df)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }