BDC-team-1/1_Descriptive_Statistics_Museum.ipynb

3185 lines
571 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"id": "3f41343f-7205-41d9-89dd-88039e301413",
"metadata": {},
"source": [
"# Statistiques descriptives"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "abfaf341-7b35-4407-9133-d21336c04027",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as mdates\n",
"from datetime import datetime, date, timedelta\n",
"from dateutil.relativedelta import relativedelta\n",
"import warnings"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7fb72fa3-7940-496f-ac78-c2837f65eefa",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c34e13f4-e043-43d6-ba8c-2e13d008647c",
"metadata": {},
"outputs": [],
"source": [
"# Import cleaning and merge functions\n",
"exec(open('0_KPI_functions.py').read())\n",
"\n",
"# Useful functions :\n",
"# display_databases(directory_path, file_name = ['customerplus_cleaned', 'target_information', 'campaigns_information', 'products_purchased_reduced'], datetime_col = None)\n",
"# campaigns_kpi_function(campaigns_information = None)\n",
"# tickets_kpi_function(tickets_information = None)\n",
"# customerplus_kpi_function(customerplus_clean = None)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "c60505f4-b95b-4c61-b842-26b27af7e280",
"metadata": {},
"outputs": [],
"source": [
"# set the max columns to none\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "aaffd291-2c88-44c8-a951-0ef1f8369ba3",
"metadata": {},
"outputs": [],
"source": [
"# Additional function to load initial \n",
"def load_dataset_2(directory_path, file_name):\n",
" \"\"\"\n",
" This function loads csv file\n",
" \"\"\"\n",
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\")\n",
"\n",
" # drop na :\n",
" #df = df.dropna(axis=1, thresh=len(df))\n",
" # if identifier in table : delete it\n",
" if 'identifier' in df.columns:\n",
" df = df.drop(columns = 'identifier')\n",
" return df"
]
},
{
"cell_type": "markdown",
"id": "45d5261f-4d46-49cb-8582-dd2121122b05",
"metadata": {},
"source": [
"# 1 - Comportement d'achat"
]
},
{
"cell_type": "markdown",
"id": "3479960c-0d23-45f1-8fff-d87395205731",
"metadata": {},
"source": [
"## Outlier"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "9376af51-4320-44b6-8f30-1e1234371556",
"metadata": {},
"outputs": [],
"source": [
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
" df_tickets = display_databases(directory_path, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
"\n",
" if directory_path == \"101\" :\n",
" df_tickets_1 = display_databases(directory_path, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
"\n",
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
" # Part du CA par customer\n",
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
" \n",
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
" \n",
" top = df_circulaire[:coupure]\n",
" rest = df_circulaire[coupure:]\n",
" \n",
" # Calculez la somme du reste\n",
" rest_sum = rest.sum()\n",
" \n",
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
" \n",
" # Créez le graphique circulaire\n",
" plt.figure(figsize=(3, 3))\n",
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
" plt.title('Répartition des montants totaux')\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "73211efc-b79f-4235-a250-c0699ea277bf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEWCAYAAAAtl/EzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3MElEQVR4nO3dd3hTZfsH8G9WkzbdpXtSRpllgyCIRZFVEARUZBVQprhFRFkKIv4c+IIvLhRRUMSBIoJFlsree7RQWlZp6V5pM+7fH30bSQddSc85yf25rl6Q5OSc+5ycfHPm88iIiMAYYwKTC10AY4wBHEaMMZHgMGKMiQKHEWNMFDiMGGOiwGHEGBMFDiPGmChwGDHGRIHDiDEmCg0WRidPnoSLiwuWL1/eUJNkjElIrcJo9erVkMlk5j+lUonAwEA8/vjjSEhIqPJ9eXl5GDFiBGbOnImZM2fWu+j6+P3337FgwYJKX4uIiEBcXJz58Y0bN7BgwQIcP368wrALFiyATCazTZF1JJPJqpw3R3H27FksWLAAV65caZDpvfXWW9i4caPVxmeN+u+2josa1cKXX35JAOjLL7+kffv20c6dO2nRokXk7OxMfn5+lJmZWen7Ro4cSaNHjyaTyVSbydnEjBkzqKrZPnr0KCUmJpofHzp0yDy/5V29epX27dtnqzLrBADNnz9f6DIEtWHDBgJAO3fubJDpabVaGj9+vNXGZ43677aOi5myLgHWpk0bdO7cGQBw//33w2g0Yv78+di4cSMmTJhQYfjvv/++jlFpPYWFhXBxcbnrMB06dKjx+EJCQhASElLfshhjZWqTXGVbRocOHbJ4fvPmzQSAlixZYvH8oUOHaPDgweTl5UVqtZrat29P69evr3Sc8fHxFBcXR15eXuTi4kKxsbF06dIli2Hj4+NpyJAhFBwcTGq1mpo0aUKTJ0+m9PR0i+Hmz59PAOjIkSM0fPhw8vT0pICAABo/fjwBqPCXlJRERETh4eHmX7mdO3dWOmzZlkfZNO5kNBpp6dKlFBUVRU5OTuTr60tjx46lq1evWgzXu3dvat26NR08eJB69uxJzs7O1LhxY1qyZAkZjcZqP4ecnBx68sknydvbm7RaLfXr148uXLhQ6ZbRxYsXadSoUeTr60tOTk7UokULWrFiRYW633zzTWrevDlpNBry8PCgtm3b0rJly+5aR9kyWrt2Lc2aNYsCAgJIq9VSbGwspaamUm5uLj311FPk4+NDPj4+FBcXR3l5eRbjKCoqotmzZ1NERASpVCoKCgqi6dOnU1ZWlsVw4eHhNGjQINqyZQt16NCBNBoNRUVF0apVq8zDlK1L5f/Ktmxru/6cPn2aHn/8cXJ3dyc/Pz+aMGECZWdnm4erbFq9e/cmIqKCggJ68cUXKSIigtRqNXl5eVGnTp1o3bp1VS7P6uonIlq1ahVFR0ebxzl06FA6e/as+fXq1vEVK1ZQr169yNfXl1xcXKhNmza0dOlSKikpqbC8K9vi6927t3keiYimTJlCarWaDh8+bH7OaDRSnz59yM/Pj27cuFHl/JZnlTBasWIFAaAff/zR/NyOHTvIycmJevXqRevXr6etW7dSXFxchYVbNs7Q0FCaOHEibdmyhT799FPy8/Oj0NBQi5Vy5cqVtGTJEvr1119p9+7d9NVXX1G7du0oKirKYmGWrUzh4eH0yiuv0LZt22jjxo2UmJhII0aMIAC0b98+859OpyMiyw8gJyfHXNvrr79uHrYsWCoLo8mTJxMAevrpp2nr1q308ccfk6+vL4WGhlqs8L179yYfHx9q1qwZffzxx7Rt2zaaPn06AaCvvvrqrp+ByWSimJgYUqvVtHjxYoqPj6f58+dTZGRkhTA6c+aMOVjWrFlD8fHx9OKLL5JcLqcFCxaYh1uyZAkpFAqaP38+bd++nbZu3UrLli2zGKYyZWEUHh5OcXFx5nl2dXWlmJgY6tu3L7300ksUHx9PS5cuJYVCQTNnzrSYl379+pFSqaS5c+dSfHw8vfvuu6TVaqlDhw7mz6XsswkJCaFWrVrRmjVr6I8//qCRI0cSANq9ezcREaWlpdFbb71FAOijjz4yf2ZpaWl1Wn+ioqJo3rx5tG3bNnr//fdJrVbThAkTzMPt27ePnJ2daeDAgeZpnTlzhohKv6QuLi70/vvv086dO+m3336jt99+m5YvX17l8qyu/rLXRo0aRZs3b6Y1a9ZQZGQkeXh40MWLF4mIql3Hn3/+eVq5ciVt3bqVduzYQR988AE1atTIYr7KlndNwqioqIjat29PkZGR5u/qvHnzSC6XU3x8fJXzWpk6hdH+/ftJr9dTXl4ebd26lQICAui+++4jvV5vHrZFixbUoUMHi+eIiGJjYykwMNC8BVA2zmHDhlkMt2fPHgJAixYtqrQWk8lEer2ekpOTCQD98ssv5tfKVqZ58+ZVeN/d9qfLfwB3O2ZUPozOnTtHAGj69OkWwx04cIAA0Jw5c8zP9e7dmwDQgQMHLIZt1aoV9evXr9LaymzZsoUA0Icffmjx/OLFiyuEUb9+/SgkJIRycnIshn366adJo9GYj/HFxsZS+/bt7zrdypSF0eDBgy2ef+655wgAPfPMMxbPDx06lLy9vc2Pt27dSgDonXfesRhu/fr1BIA+/fRT83Ph4eGk0WgoOTnZ/FxRURF5e3vTlClTzM/V9JhLTdaf8nVNnz6dNBqNxbHPqo4ZtWnThoYOHXrXGipTVf1ZWVnm4LtTSkoKqdVqeuKJJ8zP1fSYkdFoJL1eT2vWrCGFQmFxzLemYURElJCQQO7u7jR06FD6888/SS6X0+uvv179zJZTp1P799xzD1QqFdzc3NC/f394eXnhl19+gVJZeggqMTER58+fx+jRowEABoPB/Ddw4EDcvHkTFy5csBhn2bBlevTogfDwcOzcudP8XFpaGqZOnYrQ0FAolUqoVCqEh4cDAM6dO1ehzuHDh9dl9uqkrM47z8YBQNeuXdGyZUts377d4vmAgAB07drV4rno6GgkJyfXaDrll9cTTzxh8Vin02H79u0YNmwYXFxcKnwGOp0O+/fvN9d44sQJTJ8+HX/88Qdyc3NrNtP/Exsba/G4ZcuWAIBBgwZVeD4zMxP5+fkAgB07dgCouMxGjhwJrVZbYZm1b98eYWFh5scajQbNmzevdpmVqe36M2TIEIvH0dHR0Ol0SEtLq3ZaXbt2xZYtWzB79mzs2rULRUVFNaqxKvv27UNRUVGFZRUaGoo+ffpUWFZVOXbsGIYMGQIfHx8oFAqoVCqMGzcORqMRFy9erFNtTZs2xWeffYaNGzciNjYWvXr1qtPZvDqF0Zo1a3Do0CHs2LEDU6ZMwblz5zBq1Cjz67du3QIAvPTSS1CpVBZ/06dPBwDcvn3bYpwBAQEVphMQEICMjAwAgMlkwkMPPYSffvoJs2bNwvbt23Hw4EHzF6qyDzswMLAus1cnZXVWNs2goCDz62V8fHwqDKdWq6tdaTMyMqBUKiu8v/zyy8jIgMFgwPLlyyt8BgMHDgTw72fw6quv4t1338X+/fsxYMAA+Pj44IEHHsDhw4ermetS3t7eFo+dnJzu+rxOp7OYF19fX4vhZDKZxWdfpq7LDKjb+lN+emq1usphy/vPf/6DV155BRs3bkRMTAy8vb0xdOjQu14Ccze1Xb8qk5KSgl69euH69ev48MMP8ffff+PQoUP46KOPANRsvqoyaNAg+Pv7Q6fT4YUXXoBCoaj1OOp0Nq1ly5bms2kxMTEwGo34/PPP8cMPP2DEiBFo1KgRgNKV/JFHHql0HFFRURaPU1NTKwyTmpqKpk2bAgBOnz6NEydOYPXq1Rg/frx5mMTExCrrbMjrgMpW3Js3b1Y4y3bjxg3zMrHGdAwGAzIyMiy+LOWXn5eXFxQKBcaOHYsZM2ZUOq7GjRsDAJRKJV544QW88MILyM7Oxp9//ok5c+agX79+uHr1arVnIes7L+np6RaBRERITU1Fly5drDatuqw/9aHVarFw4UIsXLgQt27dMm8lDR48GOfPn6/1+O5cv8qr6fq1ceNGFBQU4KeffjJ
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"1\", coupure = 1)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "5c8e9bb7-a403-4898-b40b-47aa37237bc6",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>58201</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2020-09-03 13:11:25.569167+02:00</td>\n",
" <td>2023-03-04 13:27:42.761679+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-08 03:20:07</td>\n",
" <td>45.0</td>\n",
" <td>1254775</td>\n",
" <td>7.030122</td>\n",
" <td>330831</td>\n",
" <td>-67.790969</td>\n",
" <td>13.75153</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>641472</td>\n",
" <td>2013-06-10 12:37:58+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"58201 1 NaN NaN NaN NaN 2 \n",
"\n",
" created_at updated_at \\\n",
"58201 2020-09-03 13:11:25.569167+02:00 2023-03-04 13:27:42.761679+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"58201 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"58201 True NaN False NaN NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
"58201 NaN NaN False 2023-11-08 03:20:07 45.0 \n",
"\n",
" ticket_sum average_price fidelity average_purchase_delay \\\n",
"58201 1254775 7.030122 330831 -67.790969 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"58201 13.75153 1.956087 8821221.5 \n",
"\n",
" preferred_category preferred_supplier preferred_formula \\\n",
"58201 NaN NaN NaN \n",
"\n",
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
"58201 641472 2013-06-10 12:37:58+02:00 NaN NaN \n",
"\n",
" country age tenant_id \n",
"58201 fr NaN 1311 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('1', 'customersplus')\n",
"df[df['id'] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "4455b6b9-8395-47ea-b976-d98a2d3c782c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_2/products_purchased_reduced.csv\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAESCAYAAABq/8cSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEYklEQVR4nO3dd3gU5doG8Ht7y6Zseg+hhECAhCAICAGkd6U3KYoFhYOoNJWmHJSjHPzwSFEEUfRYDyhNIiAdCaRAaAkQkpAC6aRtsuX9/ohZsilkEzaZze7zu65csDPvzNw7u3ky5Z0ZHmOMgRBCCACAz3UAQgixJFQUCSGkCiqKhBBSBRVFQgipgooiIYRUQUWREEKqoKJICCFVUFEkhJAqqCgSQkgVLb4oXrp0CXK5HJs2beI6CiHEClhEUdy5cyd4PJ7hRygUwtPTE5MnT0ZiYmKd0xUWFmL8+PGYP38+5s+f34yJazpw4ABWrVpV67iAgADMmjXL8Do9PR2rVq1CbGxsjbarVq0Cj8drmpCNxOPx6nxvtuLq1atYtWoV7ty50yzL++c//4k9e/aYbX7myP+o77hVYRZgx44dDADbsWMHO3v2LDt27Bh7//33mUwmY25ubiw3N7fW6SZMmMCmTZvG9Hp9Myeu6dVXX2V1rc7o6Gh28+ZNw+uoqCjD+60uNTWVnT17tqliNgoAtnLlSq5jcOrHH39kANixY8eaZXkKhYLNnDnTbPMzR/5HfcetiZDDelxDSEgIunXrBgDo168fdDodVq5ciT179mD27Nk12v/www/NHbGGkpISyOXyR7YJCwszeX4+Pj7w8fF53FiEkMbiuioz9nBLMSoqymj4/v37GQC2bt06o+FRUVFs1KhRzMnJiUkkEhYaGsq+//77Wud5+PBhNmvWLObk5MTkcjkbOXIku3XrllHbw4cPs9GjRzNvb28mkUhY69at2YsvvsiysrKM2q1cuZIBYBcvXmTjxo1jjo6OzMPDg82cOZMBqPGTlJTEGGPM39/f8Ff/2LFjtbat3BKrXEZVOp2OffjhhywoKIiJxWLm6urKZsyYwVJTU43aRUREsI4dO7Lz58+zp556islkMtaqVSu2bt06ptPp6v0cCgoK2AsvvMBUKhVTKBRsyJAh7MaNG7VuKSYkJLApU6YwV1dXJhaLWfv27dmnn35aI/d7773H2rVrx6RSKXNwcGCdOnViGzdufGSOynW0e/dutnjxYubh4cEUCgUbOXIky8zMZA8ePGBz585lzs7OzNnZmc2aNYsVFhYazaO0tJQtXbqUBQQEMJFIxLy8vNi8efNYXl6eUTt/f382YsQIdvDgQRYWFsakUikLCgpi27dvN7Sp/C5V/6nc0m/o9yc+Pp5NnjyZ2dvbMzc3NzZ79myWn59vaFfbsiIiIhhjjBUXF7M33niDBQQEMIlEwpycnFh4eDj79ttv61yf9eVnjLHt27ezzp07G+Y5duxYdvXqVcP4+r7jn376KevTpw9zdXVlcrmchYSEsA8//JCVl5fXWN+1bQFHREQY3iNjjL300ktMIpGwCxcuGIbpdDo2YMAA5ubmxtLT0+t8v4/Loovip59+ygCwn3/+2TDs6NGjTCwWsz59+rDvv/+eHTp0iM2aNavGh1w5T19fXzZnzhx28OBBtm3bNubm5sZ8fX2Nfjk2b97M1q1bx3799Vd2/Phx9tVXX7EuXbqwoKAgow+18kvt7+/PlixZwiIjI9mePXvYzZs32fjx4xkAdvbsWcOPWq1mjBl/EQoKCgzZ3nnnHUPbygJXW1F88cUXGQD22muvsUOHDrEtW7YwV1dX5uvra/SLFxERwZydnVnbtm3Zli1bWGRkJJs3bx4DwL766qtHfgZ6vZ7179+fSSQStnbtWnb48GG2cuVKFhgYWKMoXrlyxVDgdu3axQ4fPszeeOMNxufz2apVqwzt1q1bxwQCAVu5ciU7cuQIO3ToENu4caNRm9pUFkV/f382a9Ysw3u2s7Nj/fv3Z4MGDWJvvvkmO3z4MPvwww+ZQCBg8+fPN3ovQ4YMYUKhkL377rvs8OHD7KOPPmIKhYKFhYUZPpfKz8bHx4d16NCB7dq1i/3+++9swoQJDAA7fvw4Y4yx+/fvs3/+858MAPvPf/5j+Mzu37/fqO9PUFAQW7FiBYuMjGQbNmxgEomEzZ4929Du7NmzTCaTseHDhxuWdeXKFcZYRbGQy+Vsw4YN7NixY2zfvn3sgw8+YJs2bapzfdaXv3LclClT2P79+9muXbtYYGAgc3BwYAkJCYwxVu93/PXXX2ebN29mhw4dYkePHmX//ve/mYuLi9H7qlzfphTF0tJSFhoaygIDAw2/qytWrGB8Pp8dPny4zvdqDhZVFM+dO8c0Gg0rLCxkhw4dYh4eHqxv375Mo9EY2rZv356FhYUZDWOMsZEjRzJPT0/DFlHlPJ955hmjdqdPn2YA2Pvvv19rFr1ezzQaDUtOTmYA2N69ew3jKr/UK1asqDHdo463VP8iPOqYYvWieO3aNQaAzZs3z6jdX3/9xQCw5cuXG4ZFREQwAOyvv/4yatuhQwc2ZMiQWrNVOnjwIAPAPvnkE6Pha9eurVEUhwwZwnx8fFhBQYFR29dee41JpVLDMeCRI0ey0NDQRy63NpVFcdSoUUbDFy5cyACwBQsWGA0fO3YsU6lUhteHDh1iANj69euN2n3//fcMANu2bZthmL+/P5NKpSw5OdkwrLS0lKlUKvbSSy8Zhpl6TM6U70/1XPPmzWNSqdTo2HhdxxRDQkLY2LFjH5mhNnXlz8vLMxTgqlJSUphEImFTp041DDP1mKJOp2MajYbt2rWLCQQCo3MCphZFxhhLTExk9vb2bOzYseyPP/5gfD6fvfPOO/W/2cdkEWefKz355JMQiURQKpUYOnQonJycsHfvXgiFFYc+b968ievXr2PatGkAAK1Wa/gZPnw4MjIycOPGDaN5Vrat1KtXL/j7++PYsWOGYffv38fLL78MX19fCIVCiEQi+Pv7AwCuXbtWI+e4cePM+r4fpTJn1bPXANC9e3cEBwfjyJEjRsM9PDzQvXt3o2GdO3dGcnKyScupvr6mTp1q9FqtVuPIkSN45plnIJfLa3wGarUa586dM2SMi4vDvHnz8Pvvv+PBgwemvem/jRw50uh1cHAwAGDEiBE1hufm5qKoqAgAcPToUQA119mECROgUChqrLPQ0FD4+fkZXkulUrRr167edVapod+f0aNHG73u3Lkz1Go17t+/X++yunfvjoMHD2Lp0qX4888/UVpaalLGupw9exalpaU11pWvry8GDBhQY13VJSYmBqNHj4azszMEAgFEIhGee+456HQ6JCQkNCpbmzZt8Pnnn2PPnj0YOXIk+vTp0yxnvy2qKO7atQtRUVE4evQoXnrpJVy7dg1TpkwxjL937x4A4M0334RIJDL6mTdvHgAgOzvbaJ4eHh41luPh4YGcnBwAgF6vx+DBg/HLL79g8eLFOHLkCM6fP2/4xa7tS+fp6WmeN2yCypy1LdPLy8swvpKzs3ONdhKJpN5fnpycHAiFwhrTV19/OTk50Gq12LRpU43PYPjw4QAefgbLli3DRx99hHPnzmHYsGFwdnbG008/jQsXLtTzriuoVCqj12Kx+JHD1Wq10XtxdXU1asfj8Yw++0qNXWdA474/1ZcnkUjqbFvd//3f/2HJkiXYs2cP+vfvD5VKhbFjxz6y69qjNPT7VZuUlBT06dMHaWlp+OSTT3Dy5ElERUXhP//5DwDT3lddRowYAXd3d6jVaixatAgCgaDR8zKVRZ19Dg4ONpx97t+/P3Q6Hb744gv89NNPGD9+PFxcXABU/LI9++yztc4jKCjI6HVmZmaNNpmZmWjTpg0AID4+HnFxcdi5cydmzpxpaHPz5s06czZnP8LKX6CMjIwaZ6XT09MN68Qcy9FqtcjJyTH6pa2+/pycnCAQCDBjxgy8+uqrtc6rVatWAAChUIhFixZh0aJFyM/Pxx9//IHly5djyJAhSE1Nrfes/eO+l6ysLKPCyBhDZmYmnnjiCbMtqzHfn8ehUCi
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"2\", coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "ee16cf31-18e1-4803-b003-ba1d1a3fc333",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>170246</th>\n",
" <td>12184</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3564</td>\n",
" <td>2023-10-12 12:25:15.438714+02:00</td>\n",
" <td>2023-11-09 05:14:01.944407+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>1275.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-08 19:17:50.565000</td>\n",
" <td>75.0</td>\n",
" <td>512831</td>\n",
" <td>12.645438</td>\n",
" <td>197358</td>\n",
" <td>0.0</td>\n",
" <td>31.719577</td>\n",
" <td>2.508381</td>\n",
" <td>6484972.4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>204447</td>\n",
" <td>2020-08-28 08:55:55.710000+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"170246 12184 NaN NaN NaN NaN 3564 \n",
"\n",
" created_at updated_at \\\n",
"170246 2023-10-12 12:25:15.438714+02:00 2023-11-09 05:14:01.944407+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"170246 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"170246 True NaN False 1275.0 NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date \\\n",
"170246 NaN NaN False 2023-11-08 19:17:50.565000 \n",
"\n",
" max_price ticket_sum average_price fidelity \\\n",
"170246 75.0 512831 12.645438 197358 \n",
"\n",
" average_purchase_delay average_price_basket average_ticket_basket \\\n",
"170246 0.0 31.719577 2.508381 \n",
"\n",
" total_price preferred_category preferred_supplier \\\n",
"170246 6484972.4 NaN NaN \n",
"\n",
" preferred_formula purchase_count first_buying_date \\\n",
"170246 NaN 204447 2020-08-28 08:55:55.710000+02:00 \n",
"\n",
" last_visiting_date zipcode country age tenant_id \n",
"170246 NaN NaN NaN NaN 1879 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('2', 'customersplus')\n",
"df[df['id'] == 12184]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "4073c986-3e2c-4945-8601-220fea747c9c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>102639</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>email1</td>\n",
" <td>1</td>\n",
" <td>2023-07-20 17:16:27.062822+02:00</td>\n",
" <td>2023-07-20 17:16:27.074952+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224453</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>firstname2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2023-07-21 10:18:44.502496+02:00</td>\n",
" <td>2023-07-21 10:18:44.502496+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>josef</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>103013</th>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>firstname3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>2023-07-21 10:18:44.503913+02:00</td>\n",
" <td>2023-07-21 10:18:44.503913+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>dominic</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138386</th>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>firstname4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>2023-07-21 10:18:44.504404+02:00</td>\n",
" <td>2023-07-21 10:18:44.504404+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>abigail</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>190087</th>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>firstname5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>2023-07-21 10:18:44.504841+02:00</td>\n",
" <td>2023-07-21 10:18:44.504841+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>sophia</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101868</th>\n",
" <td>601387</td>\n",
" <td>lastname601387</td>\n",
" <td>firstname601387</td>\n",
" <td>NaN</td>\n",
" <td>email601387</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.358715+01:00</td>\n",
" <td>2023-11-09 05:13:57.358715+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>de</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>205168</th>\n",
" <td>601388</td>\n",
" <td>lastname601388</td>\n",
" <td>firstname601388</td>\n",
" <td>NaN</td>\n",
" <td>email601388</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.359234+01:00</td>\n",
" <td>2023-11-09 05:13:57.359234+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>de</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-09 00:25:24.716000</td>\n",
" <td>15.0</td>\n",
" <td>2</td>\n",
" <td>14.0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>28.0</td>\n",
" <td>2.0</td>\n",
" <td>28.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2023-11-09 00:25:24.716000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67641</th>\n",
" <td>601389</td>\n",
" <td>lastname601389</td>\n",
" <td>firstname601389</td>\n",
" <td>NaN</td>\n",
" <td>email601389</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.360373+01:00</td>\n",
" <td>2023-11-09 05:13:57.360373+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>de</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-09 00:28:07.511000</td>\n",
" <td>15.0</td>\n",
" <td>2</td>\n",
" <td>15.0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>30.0</td>\n",
" <td>2.0</td>\n",
" <td>30.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2023-11-09 00:28:07.511000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67639</th>\n",
" <td>601390</td>\n",
" <td>lastname601390</td>\n",
" <td>firstname601390</td>\n",
" <td>NaN</td>\n",
" <td>email601390</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.360903+01:00</td>\n",
" <td>2023-11-09 05:13:57.360903+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>256450</th>\n",
" <td>601391</td>\n",
" <td>lastname601391</td>\n",
" <td>firstname601391</td>\n",
" <td>NaN</td>\n",
" <td>email601391</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.361432+01:00</td>\n",
" <td>2023-11-09 05:14:18.906054+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-09 00:36:41.172000</td>\n",
" <td>15.0</td>\n",
" <td>2</td>\n",
" <td>15.0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>30.0</td>\n",
" <td>2.0</td>\n",
" <td>30.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2023-11-09 00:36:41.172000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>275622 rows × 42 columns</p>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email \\\n",
"102639 1 NaN NaN NaN email1 \n",
"224453 2 NaN firstname2 NaN NaN \n",
"103013 3 NaN firstname3 NaN NaN \n",
"138386 4 NaN firstname4 NaN NaN \n",
"190087 5 NaN firstname5 NaN NaN \n",
"... ... ... ... ... ... \n",
"101868 601387 lastname601387 firstname601387 NaN email601387 \n",
"205168 601388 lastname601388 firstname601388 NaN email601388 \n",
"67641 601389 lastname601389 firstname601389 NaN email601389 \n",
"67639 601390 lastname601390 firstname601390 NaN email601390 \n",
"256450 601391 lastname601391 firstname601391 NaN email601391 \n",
"\n",
" street_id created_at \\\n",
"102639 1 2023-07-20 17:16:27.062822+02:00 \n",
"224453 2 2023-07-21 10:18:44.502496+02:00 \n",
"103013 3 2023-07-21 10:18:44.503913+02:00 \n",
"138386 3 2023-07-21 10:18:44.504404+02:00 \n",
"190087 3 2023-07-21 10:18:44.504841+02:00 \n",
"... ... ... \n",
"101868 3550 2023-11-09 05:13:57.358715+01:00 \n",
"205168 3550 2023-11-09 05:13:57.359234+01:00 \n",
"67641 3550 2023-11-09 05:13:57.360373+01:00 \n",
"67639 3550 2023-11-09 05:13:57.360903+01:00 \n",
"256450 3550 2023-11-09 05:13:57.361432+01:00 \n",
"\n",
" updated_at civility is_partner extra \\\n",
"102639 2023-07-20 17:16:27.074952+02:00 NaN False NaN \n",
"224453 2023-07-21 10:18:44.502496+02:00 NaN False NaN \n",
"103013 2023-07-21 10:18:44.503913+02:00 NaN False NaN \n",
"138386 2023-07-21 10:18:44.504404+02:00 NaN False NaN \n",
"190087 2023-07-21 10:18:44.504841+02:00 NaN False NaN \n",
"... ... ... ... ... \n",
"101868 2023-11-09 05:13:57.358715+01:00 NaN False NaN \n",
"205168 2023-11-09 05:13:57.359234+01:00 NaN False NaN \n",
"67641 2023-11-09 05:13:57.360373+01:00 NaN False NaN \n",
"67639 2023-11-09 05:13:57.360903+01:00 NaN False NaN \n",
"256450 2023-11-09 05:14:18.906054+01:00 NaN False NaN \n",
"\n",
" deleted_at reference gender is_email_true extra_field opt_in \\\n",
"102639 NaN NaN 2 True NaN False \n",
"224453 NaN NaN 1 True NaN False \n",
"103013 NaN NaN 2 True NaN False \n",
"138386 NaN NaN 2 True NaN False \n",
"190087 NaN NaN 1 True NaN False \n",
"... ... ... ... ... ... ... \n",
"101868 NaN NaN 2 True NaN False \n",
"205168 NaN NaN 2 True NaN False \n",
"67641 NaN NaN 2 True NaN False \n",
"67639 NaN NaN 0 True NaN False \n",
"256450 NaN NaN 2 True NaN False \n",
"\n",
" structure_id note profession language mcp_contact_id need_reload \\\n",
"102639 NaN NaN NaN NaN 1.0 False \n",
"224453 NaN NaN NaN josef NaN False \n",
"103013 NaN NaN NaN dominic NaN False \n",
"138386 NaN NaN NaN abigail NaN False \n",
"190087 NaN NaN NaN sophia NaN False \n",
"... ... ... ... ... ... ... \n",
"101868 NaN NaN NaN de NaN False \n",
"205168 NaN NaN NaN de NaN False \n",
"67641 NaN NaN NaN de NaN False \n",
"67639 NaN NaN NaN NaN NaN False \n",
"256450 NaN NaN NaN NaN NaN False \n",
"\n",
" last_buying_date max_price ticket_sum average_price \\\n",
"102639 NaN NaN 0 NaN \n",
"224453 NaN NaN 0 NaN \n",
"103013 NaN NaN 0 NaN \n",
"138386 NaN NaN 0 NaN \n",
"190087 NaN NaN 0 NaN \n",
"... ... ... ... ... \n",
"101868 NaN NaN 0 NaN \n",
"205168 2023-11-09 00:25:24.716000 15.0 2 14.0 \n",
"67641 2023-11-09 00:28:07.511000 15.0 2 15.0 \n",
"67639 NaN NaN 0 NaN \n",
"256450 2023-11-09 00:36:41.172000 15.0 2 15.0 \n",
"\n",
" fidelity average_purchase_delay average_price_basket \\\n",
"102639 0 NaN NaN \n",
"224453 0 NaN NaN \n",
"103013 0 NaN NaN \n",
"138386 0 NaN NaN \n",
"190087 0 NaN NaN \n",
"... ... ... ... \n",
"101868 0 NaN NaN \n",
"205168 1 0.0 28.0 \n",
"67641 1 0.0 30.0 \n",
"67639 0 NaN NaN \n",
"256450 1 0.0 30.0 \n",
"\n",
" average_ticket_basket total_price preferred_category \\\n",
"102639 NaN 0.0 NaN \n",
"224453 NaN 0.0 NaN \n",
"103013 NaN 0.0 NaN \n",
"138386 NaN 0.0 NaN \n",
"190087 NaN 0.0 NaN \n",
"... ... ... ... \n",
"101868 NaN 0.0 NaN \n",
"205168 2.0 28.0 NaN \n",
"67641 2.0 30.0 NaN \n",
"67639 NaN 0.0 NaN \n",
"256450 2.0 30.0 NaN \n",
"\n",
" preferred_supplier preferred_formula purchase_count \\\n",
"102639 NaN NaN 0 \n",
"224453 NaN NaN 0 \n",
"103013 NaN NaN 0 \n",
"138386 NaN NaN 0 \n",
"190087 NaN NaN 0 \n",
"... ... ... ... \n",
"101868 NaN NaN 0 \n",
"205168 NaN NaN 1 \n",
"67641 NaN NaN 1 \n",
"67639 NaN NaN 0 \n",
"256450 NaN NaN 1 \n",
"\n",
" first_buying_date last_visiting_date zipcode country \\\n",
"102639 NaN NaN NaN fr \n",
"224453 NaN NaN NaN ch \n",
"103013 NaN NaN NaN ch \n",
"138386 NaN NaN NaN ch \n",
"190087 NaN NaN NaN ch \n",
"... ... ... ... ... \n",
"101868 NaN NaN NaN NaN \n",
"205168 2023-11-09 00:25:24.716000+01:00 NaN NaN NaN \n",
"67641 2023-11-09 00:28:07.511000+01:00 NaN NaN NaN \n",
"67639 NaN NaN NaN NaN \n",
"256450 2023-11-09 00:36:41.172000+01:00 NaN NaN NaN \n",
"\n",
" age tenant_id \n",
"102639 NaN 1879 \n",
"224453 NaN 1879 \n",
"103013 NaN 1879 \n",
"138386 NaN 1879 \n",
"190087 NaN 1879 \n",
"... ... ... \n",
"101868 NaN 1879 \n",
"205168 NaN 1879 \n",
"67641 NaN 1879 \n",
"67639 NaN 1879 \n",
"256450 NaN 1879 \n",
"\n",
"[275622 rows x 42 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(by = 'id')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "475030ad-6a69-4c91-9cd6-943a0edeaf01",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATwAAAEQCAYAAAAta8hLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+3klEQVR4nO3dd3gU5doG8Ht7Sd8lyaY3QgIJIZGAihRBaRIEBTyiYABBkCKICtgoR4pwFFH0iCgaEDgfogI2MEjvndBCAiGEQHqvm23v90fMypKEFJLMlud3XblgZ2dnnpmd3HmnvcNjjDEQQogN4HNdACGEtBUKPEKIzaDAI4TYDAo8QojNoMAjhNgMCjxCiM2gwCOE2AwKPEKIzaDAI4TYjDYLvAsXLkAul2P16tVtNUtCCDHRpMCLi4sDj8cz/giFQnh4eOD555/HtWvX6v1caWkpRo4ciRkzZmDGjBkPXPSD+OOPP7Bw4cI63/P398e4ceOMrzMyMrBw4UKcP3++1rgLFy4Ej8drnSKbicfj1btstuLKlStYuHAhbt682SbzW7p0KbZv395i02uJ+u+3jds81gTfffcdA8C+++47duzYMbZv3z62ePFiJpPJmJubGysoKKjzc6NGjWIvvvgiMxgMTZldq5g2bRqrb7HPnj3Lrl+/bnx96tQp4/LeKz09nR07dqy1ymwWAGzBggVcl8GprVu3MgBs3759bTI/Ozs7Fhsb22LTa4n677eN2zphc0IyPDwc0dHRAIDHH38cer0eCxYswPbt2zF+/Pha4//www/NjOOWU1FRAblcft9xoqKiGj09b29veHt7P2hZhJC21JR0rGnhnTp1ymT477//zgCwZcuWmQw/deoUGzp0KHNxcWESiYRFRkayLVu21DnN+Ph4Nm7cOObi4sLkcjmLiYlhKSkpJuPGx8ezp59+mnl5eTGJRMKCgoLYK6+8wnJzc03GW7BgAQPAzpw5w0aMGMGcnZ2ZSqVisbGxDECtn9TUVMYYY35+fsa/1vv27atz3JoWVM087qbX69ny5ctZSEgIE4vFzNXVlY0dO5alp6ebjNenTx8WFhbGTp48yXr27MlkMhkLCAhgy5YtY3q9vsHvobi4mE2cOJEpFApmZ2fHBg4cyJKSkups4SUnJ7PRo0czV1dXJhaLWWhoKPv8889r1f3BBx+wDh06MKlUypycnFjnzp3ZqlWr7ltHzTratGkTmzNnDlOpVMzOzo7FxMSwrKwsVlJSwiZNmsSUSiVTKpVs3LhxrLS01GQalZWVbN68eczf35+JRCLm6enJpk6dygoLC03G8/PzY0OGDGE7d+5kUVFRTCqVspCQELZu3TrjODXb0r0/NS30pm4/ly5dYs8//zxzdHRkbm5ubPz48ayoqMg4Xl3z6tOnD2OMsfLycvbGG28wf39/JpFImIuLC+vatSvbvHlzveuzofoZY2zdunUsIiLCOM3hw4ezK1euGN9vaBv//PPPWa9evZirqyuTy+UsPDycLV++nGk0mlrru66Wa58+fYzLyBhjkydPZhKJhJ0+fdo4TK/Xs379+jE3NzeWkZFR7/JyoUUC7/PPP2cA2E8//WQctnfvXiYWi1mvXr3Yli1b2K5du9i4ceNqfYE10/Tx8WETJkxgO3fuZGvXrmVubm7Mx8fHZMP/8ssv2bJly9gvv/zCDhw4wNavX8+6dOnCQkJCTL6wmg3Wz8+PzZ07l+3evZtt376dXb9+nY0cOZIBYMeOHTP+qNVqxpjpl1xcXGys7b333jOOWxNedQXeK6+8wgCw6dOns127drE1a9YwV1dX5uPjY/JL1adPH6ZUKllwcDBbs2YN2717N5s6dSoDwNavX3/f78BgMLC+ffsyiUTClixZwuLj49mCBQtYYGBgrcC7fPmyMbw2bNjA4uPj2RtvvMH4fD5buHChcbxly5YxgUDAFixYwPbs2cN27drFVq1aZTJOXWoCz8/Pj40bN864zPb29qxv376sf//+7M0332Tx8fFs+fLlTCAQsBkzZpgsy8CBA5lQKGTvv/8+i4+PZx999BGzs7NjUVFRxu+l5rvx9vZmnTp1Yhs2bGB//vknGzVqFAPADhw4wBhjLCcnhy1dupQBYF988YXxO8vJyWnW9hMSEsLmz5/Pdu/ezVauXMkkEgkbP368cbxjx44xmUzGnnrqKeO8Ll++zBirDgK5XM5WrlzJ9u3bx3777Tf24YcfstWrV9e7Phuqv+a90aNHs99//51t2LCBBQYGMicnJ5acnMwYYw1u46+//jr78ssv2a5du9jevXvZJ598wtq1a2eyXDXruzGBV1lZySIjI1lgYKDxd3X+/PmMz+ez+Pj4epeVK80KvOPHjzOtVstKS0vZrl27mEqlYr1792ZardY4bmhoKIuKijIZxhhjMTExzMPDw9iSqZnmM888YzLekSNHGAC2ePHiOmsxGAxMq9WytLQ0BoDt2LHD+F7NBjt//vxan7vf8Y17v+T7HcO7N/ASExMZADZ16lST8U6cOMEAsHfeecc4rE+fPgwAO3HihMm4nTp1YgMHDqyztho7d+5kANinn35qMnzJkiW1Am/gwIHM29ubFRcXm4w7ffp0JpVKjcdcY2JiWGRk5H3nW5eawBs6dKjJ8FmzZjEA7LXXXjMZPnz4cKZQKIyvd+3axQCwFStWmIy3ZcsWBoCtXbvWOMzPz49JpVKWlpZmHFZZWckUCgWbPHmycVhjj4E1Zvu5t66pU6cyqVRqciy6vmN44eHhbPjw4fetoS711V9YWGgM17vdunWLSSQS9sILLxiHNfYYnl6vZ1qtlm3YsIEJBAKTY/CNDTzGGLt27RpzdHRkw4cPZ3/99Rfj8/nsvffea3hhOdCsy1IeeeQRiEQiODg4YNCgQXBxccGOHTsgFFYfErx+/TquXr2KF198EQCg0+mMP0899RQyMzORlJRkMs2acWv06NEDfn5+2Ldvn3FYTk4OpkyZAh8fHwiFQohEIvj5+QEAEhMTa9U5YsSI5ixes9TUefdZXgDo3r07OnbsiD179pgMV6lU6N69u8mwiIgIpKWlNWo+966vF154weS1Wq3Gnj178Mwzz0Aul9f6DtRqNY4fP26sMSEhAVOnTsWff/6JkpKSxi3032JiYkxed+zYEQAwZMiQWsMLCgpQVlYGANi7dy+A2uts1KhRsLOzq7XOIiMj4evra3wtlUrRoUOHBtdZjaZuP08//bTJ64iICKjVauTk5DQ4r+7du2Pnzp2YN28e9u/fj8rKykbVWJ9jx46hsrKy1rry8fFBv379aq2r+pw7dw5PP/00lEolBAIBRCIRXnrpJej1eiQnJzertvbt2+Prr7/G9u3bERMTg169epntWeJmBd6GDRtw6tQp7N27F5MnT0ZiYiJGjx5tfD87OxsA8Oabb0IkEpn8TJ06FQCQl5dnMk2VSlVrPiqVCvn5+QAAg8GAAQMG4Oeff8acOXOwZ88enDx50vhLW9cG5eHh0ZzFa5aaOuuap6enp/H9GkqlstZ4EomkwV+M/Px8CIXCWp+/d/3l5+dDp9Nh9erVtb6Dp556CsA/38Hbb7+Njz76CMePH8fgwYOhVCrxxBNP4PTp0w0sdTWFQmHyWiwW33e4Wq02WRZXV1eT8Xg8nsl3X6O56wxo3vZz7/wkEkm9497rs88+w9y5c7F9+3b07dsXCoUCw4cPv+/lW/fT1O2rLrdu3UKvXr1w584dfPrppzh06BBOnTqFL774AkDjlqs+Q4YMgbu7O9RqNWbPng2BQNDsabWmZp2l7dixo/Esbd++faHX6/HNN9/gxx9/xMiRI9GuXTsA1b9Izz77bJ3TCAkJMXmdlZVVa5ysrCy0b98eAHDp0iUkJCQgLi4OsbGxxnGuX79eb51teZ1czS9HZmZmrbO3GRkZxnXSEvPR6XTIz883+YW8d/25uLhAIBBg7NixmDZtWp3TCggIAAAIhULMnj0bs2fPRlFREf766y+88847GDhwINLT0xs8u/2gy5Kbm2sSeowxZGVloVu3bi02r+ZsPw/Czs4OixYtwqJFi5CdnW1s7Q0
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"3\", coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "b64d04db-1c3f-4538-9d05-8f7d62c7c046",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>105720</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1961-12-04</td>\n",
" <td>NaN</td>\n",
" <td>91159</td>\n",
" <td>2021-03-02 15:35:40.452065+01:00</td>\n",
" <td>2023-11-09 01:31:07.539604+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>19715.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-06 16:57:19</td>\n",
" <td>7500.0</td>\n",
" <td>2297716</td>\n",
" <td>10.152196</td>\n",
" <td>14917</td>\n",
" <td>-39771.165147</td>\n",
" <td>27.514811</td>\n",
" <td>2.710232</td>\n",
" <td>2.332686e+07</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>847793</td>\n",
" <td>2016-01-01 10:23:36+01:00</td>\n",
" <td>2023-11-06 17:12:00</td>\n",
" <td>13090</td>\n",
" <td>fr</td>\n",
" <td>61.0</td>\n",
" <td>1512</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"105720 1 NaN NaN 1961-12-04 NaN 91159 \n",
"\n",
" created_at updated_at \\\n",
"105720 2021-03-02 15:35:40.452065+01:00 2023-11-09 01:31:07.539604+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"105720 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"105720 False NaN False 19715.0 NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
"105720 NaN NaN False 2023-11-06 16:57:19 7500.0 \n",
"\n",
" ticket_sum average_price fidelity average_purchase_delay \\\n",
"105720 2297716 10.152196 14917 -39771.165147 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"105720 27.514811 2.710232 2.332686e+07 \n",
"\n",
" preferred_category preferred_supplier preferred_formula \\\n",
"105720 NaN NaN NaN \n",
"\n",
" purchase_count first_buying_date last_visiting_date \\\n",
"105720 847793 2016-01-01 10:23:36+01:00 2023-11-06 17:12:00 \n",
"\n",
" zipcode country age tenant_id \n",
"105720 13090 fr 61.0 1512 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('3', 'customersplus')\n",
"df[df['id'] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "1d817bee-3ded-4066-9f91-6cf095591b0e",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEQCAYAAAD7zhIuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAUklEQVR4nO3dd3QUVRsG8Gd7S++9h4SSQGjSIdKRpoCKgIAgSFNsiChNQQTxAwWlC9IUC0VEQgldQAIEAqR30nvbZLPtfn9gVpb0ZDezm9zfOTmcnZ3MvDs7eZhy514WIYSAoiiKYWymC6AoigJoGFEUZSBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBaLIwiIiIgFouxZcuWllolRVFGpFFhtG/fPrBYLM0Pl8uFo6MjXn31VcTFxdX6e6WlpZg4cSIWLVqERYsWNbvo5vjrr7+watWqGt/z8PDAjBkzNK8zMjKwatUq3Lt3r9q8q1atAovF0k+RTcRisWr9bG1FZGQkVq1aheTk5BZZ3xdffIHjx4/rbHm6qL+ufdygkUbYu3cvAUD27t1Lbty4QS5evEjWrFlDRCIRsbOzIwUFBTX+3qRJk8iUKVOIWq1uzOr0YsGCBaS2j3337l0SHx+veR0WFqb5vM96/PgxuXHjhr7KbBIAZOXKlUyXwahff/2VACAXL15skfVJJBIyffp0nS1PF/XXtY8bMm5TAqxTp07o3r07AGDQoEFQqVRYuXIljh8/jpkzZ1ab/5dffmliVOpOeXk5xGJxnfMEBQU1eHkuLi5wcXFpblkURVVpTHJVHRmFhYVpTT916hQBQNatW6c1PSwsjIwZM4ZYWloSgUBAunTpQo4cOVLjMs+ePUtmzJhBLC0tiVgsJqNHjyYJCQla8549e5aMHTuWODs7E4FAQLy9vcmcOXNIbm6u1nwrV64kAMidO3fIhAkTiIWFBXFwcCDTp08nAKr9JCUlEUIIcXd31/wvd/HixRrnrTryqFrH01QqFVm/fj3x8/MjfD6f2NrakmnTppHHjx9rzTdw4EDSsWNHcuvWLdKvXz8iEomIp6cnWbduHVGpVPV+D8XFxWT27NnEysqKSCQSMnz4cBITE1PjkVFsbCyZPHkysbW1JXw+n/j7+5OtW7dWq/vzzz8n7dq1I0KhkJibm5OAgACyefPmOuuo2kaHDh0iS5YsIQ4ODkQikZDRo0eTrKwsUlJSQt58801ibW1NrK2tyYwZM0hpaanWMioqKsjSpUuJh4cH4fF4xMnJicyfP58UFhZqzefu7k5eeOEFcvr0aRIUFESEQiHx8/Mje/bs0cxTtS89+1N1ZNvY/efhw4fk1VdfJWZmZsTOzo7MnDmTFBUVaearaV0DBw4khBAilUrJ+++/Tzw8PIhAICCWlpakW7du5PDhw7Vuz/rqJ4SQPXv2kMDAQM0yx48fTyIjIzXv17ePb926lfTv35/Y2toSsVhMOnXqRNavX0/kcnm17V3TEd/AgQM1n5EQQubOnUsEAgG5ffu2ZppKpSLPP/88sbOzIxkZGbV+3mfpJIy2bt1KAJDff/9dM+3ChQuEz+eT/v37kyNHjpCQkBAyY8aMahu3apmurq7kjTfeIKdPnyY7d+4kdnZ2xNXVVWun3LZtG1m3bh35448/yOXLl8mPP/5IOnfuTPz8/LQ2ZtXO5O7uTj766CNy7tw5cvz4cRIfH08mTpxIAJAbN25ofmQyGSFE+wsoLi7W1Pbpp59q5q0KlprCaM6cOQQAWbhwIQkJCSHbt28ntra2xNXVVWuHHzhwILG2tia+vr5k+/bt5Ny5c2T+/PkEAPnxxx/r/A7UajUJDg4mAoGArF27lpw9e5asXLmSeHl5VQujR48eaYJl//795OzZs+T9998nbDabrFq1SjPfunXrCIfDIStXriShoaEkJCSEbN68WWuemlSFkbu7O5kxY4bmM5uYmJDg4GAydOhQ8sEHH5CzZ8+S9evXEw6HQxYtWqT1WYYPH064XC5Zvnw5OXv2LNm4cSORSCQkKChI871UfTcuLi6kQ4cOZP/+/eTMmTNk0qRJBAC5fPkyIYSQnJwc8sUXXxAA5LvvvtN8Zzk5OU3af/z8/MiKFSvIuXPnyP/+9z8iEAjIzJkzNfPduHGDiEQiMmrUKM26Hj16RAh58kcqFovJ//73P3Lx4kXy559/ki+//JJs2bKl1u1ZX/1V702ePJmcOnWK7N+/n3h5eRFzc3MSGxtLCCH17uPvvvsu2bZtGwkJCSEXLlwgmzZtIjY2Nlqfq2p7NySMKioqSJcuXYiXl5fmb3XFihWEzWaTs2fP1vpZa9KkMLp58yZRKBSktLSUhISEEAcHBzJgwACiUCg08/r7+5OgoCCtaYQQMnr0aOLo6Kg5Aqha5osvvqg1399//00AkDVr1tRYi1qtJgqFgqSkpBAA5MSJE5r3qnamFStWVPu9us6nn/0C6rpm9GwYRUVFEQBk/vz5WvP9888/BABZtmyZZtrAgQMJAPLPP/9ozduhQwcyfPjwGmurcvr0aQKAfPPNN1rT165dWy2Mhg8fTlxcXEhxcbHWvAsXLiRCoVBzjW/06NGkS5cuda63JlVhNGbMGK3pixcvJgDI22+/rTV9/PjxxMrKSvM6JCSEACAbNmzQmu/IkSMEANm5c6dmmru7OxEKhSQlJUUzraKiglhZWZG5c+dqpjX0mktD9p9n65o/fz4RCoVa1z5ru2bUqVMnMn78+DprqElt9RcWFmqC72mpqalEIBCQ1157TTOtodeMVCoVUSgUZP/+/YTD4Whd821oGBFCSFxcHDEzMyPjx48n58+fJ2w2m3z66af1f9hnNOnWfq9evcDj8WBqaooRI0bA0tISJ06cAJf75BJUfHw8oqOjMWXKFACAUqnU/IwaNQqZmZmIiYnRWmbVvFX69OkDd3d3XLx4UTMtJycHb731FlxdXcHlcsHj8eDu7g4AiIqKqlbnhAkTmvLxmqSqzqfvxgFAz5490b59e4SGhmpNd3BwQM+ePbWmBQYGIiUlpUHreXZ7vfbaa1qvZTIZQkND8eKLL0IsFlf7DmQyGW7evKmp8f79+5g/fz7OnDmDkpKShn3of40ePVrrdfv27QEAL7zwQrXpBQUFKCsrAwBcuHABQPVtNmnSJEgkkmrbrEuXLnBzc9O8FgqFaNeuXb3brEpj95+xY8dqvQ4MDIRMJkNOTk696+rZsydOnz6NpUuX4tKlS6ioqGhQjbW5ceMGKioqqm0rV1dXPP/889W2VW3Cw8MxduxYWFtbg8PhgMfj4fXXX4dKpUJsbGyTavPx8cGuXbtw/PhxjB49Gv3792/S3bwmhdH+/fsRFhaGCxcuYO7cuYiKisLkyZM172dnZwMAPvjgA/B4PK2f+fPnAwDy8vK0lung4FBtPQ4ODsjPzwcAqNVqDBs2DEePHsWSJUsQGhqKW7duaf6gavqyHR0dm/LxmqSqzprW6eTkpHm/irW1dbX5BAJBvTttfn4+uFxutd9/dvvl5+dDqVRiy5Yt1b6DUaNGAfjvO/j444+xceNG3Lx5EyNHjoS1tTUGDx6M27dv1/Opn7CystJ6zefz65wuk8m0Poutra3WfCwWS+u7r9LUbQY0bf95dn0CgaDWeZ/17bff4qOPPsLx48cRHBwMKysrjB8/vs4mMHVp7P5Vk9TUVPTv3x/p6en45ptvcPXqVYSFheG7774D0LDPVZsXXngB9vb2kMlkeO+998DhcBq9jCbdTWvfvr3mblpwcDBUKhV2796N3377DRMnToSNjQ2AJzv5Sy+9VOMy/Pz8tF5nZWVVmycrKws+Pj4AgIcPH+L+/fvYt28fpk+frpknPj6+1jpbsh1Q1Y6bmZlZ7S5bRkaGZpvoYj1KpRL5+flafyzPbj9LS0twOBxMmzYNCxYsqHFZnp6eAAAul4v33nsP7733HoqKinD+/HksW7YMw4cPx+PHj+u9C9ncz5Kbm6sVSIQQZGVloUePHjpbV1P2n+aQSCRYvXo1Vq9ejezsbM1R0pgxYxAdHd3o5T2
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"4\", coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "4cc07982-1070-439b-a579-fd3f351778b3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>300754</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2020-09-25 19:09:07.669208+02:00</td>\n",
" <td>2021-11-30 02:07:28.120188+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-07 16:33:09</td>\n",
" <td>360.0</td>\n",
" <td>1237224</td>\n",
" <td>6.056248</td>\n",
" <td>236850</td>\n",
" <td>0.015528</td>\n",
" <td>13.493612</td>\n",
" <td>2.228048</td>\n",
" <td>7492935.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>555295</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1342</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"300754 2 NaN NaN NaN NaN 2 \n",
"\n",
" created_at updated_at \\\n",
"300754 2020-09-25 19:09:07.669208+02:00 2021-11-30 02:07:28.120188+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"300754 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"300754 False NaN False NaN NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
"300754 NaN NaN False 2023-11-07 16:33:09 360.0 \n",
"\n",
" ticket_sum average_price fidelity average_purchase_delay \\\n",
"300754 1237224 6.056248 236850 0.015528 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"300754 13.493612 2.228048 7492935.0 \n",
"\n",
" preferred_category preferred_supplier preferred_formula \\\n",
"300754 NaN NaN NaN \n",
"\n",
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
"300754 555295 1901-01-01 00:09:21+00:09 NaN NaN \n",
"\n",
" country age tenant_id \n",
"300754 NaN NaN 1342 "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('4', 'customersplus')\n",
"df[df['id'] == 2]"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "f74a9e62-a0f7-41cf-9834-78a99204547c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced_1.csv\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATEAAAEQCAYAAADYlUP7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBNklEQVR4nO3dd3gUVdsG8Ht7S9s00isQWkIiSJUSBAIYmoAUQToKCKIoIK+0V3xRPgsISpEAgqioIAIKhI5IgFASSigJpJFOeja72Xa+P2IWlvSQZHZ2z++6csHOnp15ZndyZ+bM2RkOIYSAoiiKpbhMF0BRFPU8aIhRFMVqNMQoimI1GmIURbEaDTGKoliNhhhFUaxGQ4yiKFajIUZRFKvREKMoitWaLcRu3LgBqVSKDRs2NNciKYqyAPUKsZ07d4LD4Rh++Hw+XF1dMW7cOMTHx1f7uuLiYowePRrz5s3DvHnznrvo5/HXX39h5cqVVT7n4+ODKVOmGB6np6dj5cqViImJqdR25cqV4HA4TVNkA3E4nGrXzVLExcVh5cqVSEpKapbl/e9//8OBAwcabX6NUX9N27hZIvWwY8cOAoDs2LGDREVFkdOnT5PVq1cTiURCnJ2dSV5eXpWvGzNmDHn99deJXq+vz+KaxNy5c0l1q33t2jWSkJBgeBwdHW1Y32elpqaSqKiopiqzQQCQFStWMF0Go3799VcCgJw+fbpZlieTycjkyZMbbX6NUX9N27g54jck+Dp06IDOnTsDAPr27QudTocVK1bgwIEDmDp1aqX2v/zySwMjtvGUlpZCKpXW2CYkJKTO8/Pw8ICHh8fzlkVR1POqT+JV7IlFR0cbTf/zzz8JALJmzRqj6dHR0WTo0KFELpcTkUhEgoODyd69e6ucZ2RkJJkyZQqRy+VEKpWS8PBw8uDBA6O2kZGRZNiwYcTd3Z2IRCLi7+9PZs2aRXJycozarVixggAgV69eJaNGjSJ2dnbExcWFTJ48mQCo9JOYmEgIIcTb29vwV/X06dNVtq3Y06lYxtN0Oh357LPPSEBAABEKhcTJyYlMmjSJpKamGrXr06cPad++Pbl8+TJ56aWXiEQiIb6+vmTNmjVEp9PV+jkUFhaSGTNmEHt7eyKTyUhYWBi5d+9elXti9+/fJ+PHjydOTk5EKBSSNm3akI0bN1aq++OPPyatW7cmYrGY2NraksDAQLJu3boa66h4j/bs2UMWLVpEXFxciEwmI+Hh4SQzM5MUFRWRmTNnEgcHB+Lg4ECmTJlCiouLjeahVCrJkiVLiI+PDxEIBMTNzY3MmTOH5OfnG7Xz9vYmr7zyCjly5AgJCQkhYrGYBAQEkIiICEObim3p2Z+KPen6bj+3bt0i48aNIzY2NsTZ2ZlMnTqVFBQUGNpVtaw+ffoQQghRKBRk4cKFxMfHh4hEIiKXy0mnTp3Ijz/+WO37WVv9hBASERFBgoKCDPMcMWIEiYuLMzxf2za+ceNG0qtXL+Lk5ESkUinp0KED+eyzz4hara70fle1h9mnTx/DOhJCyJtvvklEIhG5cuWKYZpOpyP9+vUjzs7OJD09vdr1bSyNEmIbN24kAMi+ffsM006dOkWEQiHp1asX2bt3Lzl69CiZMmVKpQ+lYp6enp5k2rRp5MiRI2Tr1q3E2dmZeHp6Gm3MmzZtImvWrCEHDx4kZ8+eJd9//z3p2LEjCQgIMPoQKjZCb29vsnjxYnL8+HFy4MABkpCQQEaPHk0AkKioKMOPSqUihBh/cIWFhYbaPvroI0PbikCqKsRmzZpFAJC3336bHD16lGzevJk4OTkRT09Po1+UPn36EAcHB9KqVSuyefNmcvz4cTJnzhwCgHz//fc1fgZ6vZ6EhoYSkUhEPvnkExIZGUlWrFhB/Pz8KoXY7du3DYG0a9cuEhkZSRYuXEi4XC5ZuXKlod2aNWsIj8cjK1asICdPniRHjx4l69atM2pTlYoQ8/b2JlOmTDGss5WVFQkNDSUDBgwg77//PomMjCSfffYZ4fF4ZN68eUbrEhYWRvh8Plm2bBmJjIwkn3/+OZHJZCQkJMTwuVR8Nh4eHqRdu3Zk165d5NixY2TMmDEEADl79iwhhJDs7Gzyv//9jwAg33zzjeEzy87ObtD2ExAQQJYvX06OHz9OvvzySyISicjUqVMN7aKioohEIiFDhgwxLOv27duEkPJfbqlUSr788kty+vRpcvjwYfLpp5+SDRs2VPt+1lZ/xXPjx48nf/75J9m1axfx8/Mjtra25P79+4QQUus2/u6775JNmzaRo0ePklOnTpGvvvqKODo6Gq1XxftdlxBTKpUkODiY+Pn5GX5Xly9fTrhcLomMjKx2XRtTg0Ls4sWLRKPRkOLiYnL06FHi4uJCevfuTTQajaFtmzZtSEhIiNE0QggJDw8nrq6uhj2OinmOHDnSqN0///xDAJDVq1dXWYterycajYYkJycTAOSPP/4wPFexES5fvrzS62rqL3j2g6upT+zZELtz5w4BQObMmWPU7tKlSwQAWbp0qWFanz59CABy6dIlo7bt2rUjYWFhVdZW4ciRIwQAWb9+vdH0Tz75pFKIhYWFEQ8PD1JYWGjU9u233yZisdjQhxkeHk6Cg4NrXG5VKkJs6NChRtMXLFhAAJD58+cbTR8xYgSxt7c3PD569CgBQNauXWvUbu/evQQA2bp1q2Gat7c3EYvFJDk52TBNqVQSe3t78uabbxqm1bVPqS7bz7N1zZkzh4jFYqO+3er6xDp06EBGjBhRYw1Vqa7+/Px8Q2A+LSUlhYhEIjJhwgTDtLr2iel0OqLRaMiuXbsIj8cz6tOua4gRQkh8fDyxsbEhI0aMICdOnCBcLpd89NFHta9sI2nQEItu3bpBIBDA2toagwYNglwuxx9//AE+v7yLLSEhAXfv3sXrr78OANBqtYafIUOGICMjA/fu3TOaZ0XbCj169IC3tzdOnz5tmJadnY233noLnp6e4PP5EAgE8Pb2BgDcuXOnUp2jRo1qyOo1SEWdT5/dBIAuXbqgbdu2OHnypNF0FxcXdOnSxWhaUFAQkpOT67ScZ9+vCRMmGD1WqVQ4efIkRo4cCalUWukzUKlUuHjxoqHG2NhYzJkzB8eOHUNRUVHdVvpf4eHhRo/btm0LAHjllVcqTc/Ly0NJSQkA4NSpUwAqv2djxoyBTCar9J4FBwfDy8vL8FgsFqN169a1vmcV6rv9DBs2zOhxUFAQVCoVsrOza11Wly5dcOTIESxZsgRnzpyBUqmsU43ViYqKglKprPReeXp6ol+/fpXeq+pcv34dw4YNg4ODA3g8HgQCAd544w3odDrcv3+/QbW1bNkS3333HQ4cOIDw8HD06tWrWc+ONijEdu3ahejoaJw6dQpvvvkm7ty5g/Hjxxuez8rKAgC8//77EAgERj9z5swBADx+/Nhoni4uLpWW4+LigtzcXACAXq/HwIEDsX//fixatAgnT57E5cuXDb+IVW0krq6uDVm9Bqmos6plurm5GZ6v4ODgUKmdSCSqdWPPzc0Fn8+v9Ppn37/c3FxotVps2LCh0mcwZMgQAE8+gw8//BCff/45Ll68iMGDB8PBwQEvv/wyrly5Ustal7O3tzd6LBQKa5yuUqmM1sXJycmoHYfDMfrsKzT0PQMatv08uzyRSFRt22d9/fXXWLx4MQ4cOIDQ0FDY29tjxIgRNQ5Fqkl9t6+qpKSkoFevXkhLS8P69evx999/Izo6Gt988w2Auq1XdV555RW0aNECKpUK7733Hng8XoPnVV8NOjvZtm1bw9nJ0NBQ6HQ6bNu2Db/99htGjx4NR0dHAOW/HK+++mqV8wgICDB6nJmZWalNZmYmWrZsCQC4desWYmNjsXPnTkyePNnQJiEhodo6m3McV8UGn5GRUemsZXp6uuE9aYzlaLVa5ObmGv2SPfv+yeVy8Hg8TJo0CXPnzq1yXr6+vgAAPp+P9957D++99x4KCgpw4sQJLF26FGFhYUhNTa31rO7zrktOTo5RkBFCkJmZiRdffLHRltWQ7ed5yGQyrFq1CqtWrUJWVpZhr2zo0KG4e/d
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"101\", coupure = 2)"
]
},
{
"cell_type": "markdown",
"id": "dbebfa92-310a-417b-a7fa-36ac3593db06",
"metadata": {},
"source": [
"## Evolution des commandes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "06137694-7f50-47ba-8749-68471ececc1e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1235/3643128924.py:11: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
"/tmp/ipykernel_1235/3643128924.py:19: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = ['sent_at'], date_parser=custom_date_parser)\n"
]
}
],
"source": [
"# Importation - Chargement des données temporaires\n",
"def custom_date_parser(date_string):\n",
" return pd.to_datetime(date_string, utc = True, format = 'ISO8601')\n",
"\n",
"# Achat\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Input/Company_1/products_purchased_reduced.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
" \n",
"# Emails\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Input/Company_1/campaigns_information.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = ['sent_at'], date_parser=custom_date_parser)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e6b962d4-1a30-4133-ac0f-359f7afef42c",
"metadata": {},
"outputs": [],
"source": [
"# Mois du premier achat\n",
"purchase_min = purchases.groupby(['customer_id'])['purchase_date'].min().reset_index()\n",
"purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)\n",
"purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])\n",
"purchase_min['first_purchase_month'] = pd.to_datetime(purchase_min['first_purchase_event'].dt.strftime('%Y-%m'))\n",
"\n",
"# Mois du premier mails\n",
"first_mail_received = campaigns.groupby('customer_id')['sent_at'].min().reset_index()\n",
"first_mail_received.rename(columns = {'sent_at' : 'first_email_reception'}, inplace = True)\n",
"first_mail_received['first_email_reception'] = pd.to_datetime(first_mail_received['first_email_reception'])\n",
"first_mail_received['first_email_month'] = pd.to_datetime(first_mail_received['first_email_reception'].dt.strftime('%Y-%m'))\n",
"\n",
"# Fusion \n",
"known_customer = pd.merge(purchase_min[['customer_id', 'first_purchase_month']], \n",
" first_mail_received[['customer_id', 'first_email_month']], on = 'customer_id', how = 'outer')\n",
"\n",
"# Mois à partir duquel le client est considere comme connu\n",
"known_customer['known_date'] = pd.to_datetime(known_customer[['first_email_month', 'first_purchase_month']].min(axis = 1), utc = True, format = 'ISO8601')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9c56e5ac-cbf4-4343-80ba-be2ab8b60eab",
"metadata": {},
"outputs": [],
"source": [
"# Nombre de commande par mois\n",
"purchases_count = pd.merge(purchases[['customer_id', 'purchase_id', 'purchase_date']].drop_duplicates(), known_customer[['customer_id', 'known_date']], on = ['customer_id'], how = 'inner')\n",
"purchases_count['is_customer_known'] = purchases_count['purchase_date'] > purchases_count['known_date'] + pd.DateOffset(months=1)\n",
"purchases_count['purchase_date_month'] = pd.to_datetime(purchases_count['purchase_date'].dt.strftime('%Y-%m'))\n",
"purchases_count = purchases_count[purchases_count['customer_id'] != 1]\n",
"\n",
"# Nombre de commande par mois par type de client\n",
"nb_purchases_graph = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['purchase_id'].count().reset_index()\n",
"nb_purchases_graph.rename(columns = {'purchase_id' : 'nb_purchases'}, inplace = True)\n",
"\n",
"nb_purchases_graph_2 = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['customer_id'].nunique().reset_index()\n",
"nb_purchases_graph_2.rename(columns = {'customer_id' : 'nb_new_customer'}, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8c1aed44-03d3-49f9-b96c-b06a0df03dde",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHFCAYAAAAT5Oa6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABqOUlEQVR4nO3dd1gUV/s38O9KWYqwUqQpiLEQFLAmisZgAYGIPbFgsGBJrCFqjESN6GOJJrZHo1Fjiw2TxxJbUKOiUSyIYiVGDbYIYhQXQTrn/cOX+bnugizS9/u5rr10z9w7c5/dAe49c2ZGJoQQICIiItJh1co7ASIiIqLyxoKIiIiIdB4LIiIiItJ5LIiIiIhI57EgIiIiIp3HgoiIiIh0HgsiIiIi0nksiIiIiEjnsSAiIiKtrVq1Cqampjhz5kx5p0JUIlgQVXDr16+HTCaDkZER7ty5o7a8ffv2cHNzK4fMgMGDB6N69erlsu3XkclkCAsLk57nv4+lJSwsDDKZDP/++2+JrXPLli1YvHhxia2PSk7+511S8vfP27dvS22DBw9G+/btS2wbJSk2Nhbjx49HeHg4WrVqVWLrTUhIwNSpU+Hp6Qlra2uYm5ujRYsWWLVqFXJzc9XiU1NTERISAgcHBxgZGaFp06YIDw9XicnNzcXChQvh5+eH2rVrw8TEBK6urpg8eTKePn2qEpuWloZ+/frBxcUFZmZmMDU1RePGjTFr1iykpaWVWD/zOTs7QyaTFfg5//TTT5DJZJDJZIiMjJTaX/e7t3r16hg8eLBa+99//40xY8agYcOGMDY2homJCRo3boypU6fin3/+UVl//nY1PfLdvn1banv59+3LgoOD1V5XUemXdwJUNJmZmZg6dSo2btxY3qlQGdmyZQuuXLmCkJCQ8k6FSJKSkoKPPvoICxcuRNeuXUt03TExMfjpp58wcOBATJs2DQYGBvjtt98wcuRInD59GmvXrlWJ79WrF6Kjo/HNN9+gYcOG2LJlC/r374+8vDwEBgYCANLT0xEWFob+/ftj2LBhsLa2xvnz5zFr1izs2bMH586dg7GxMQAgOzsbQgiMHz8edevWRbVq1XD8+HHMnDkTkZGR+P3330u0vwBgZmaG48eP49atW6hXr57KsrVr18Lc3BwpKSlvvJ29e/eiX79+sLa2xpgxY9CsWTPIZDJcvnwZa9euxb59+3DhwgUp3tjYGEeOHClyH9avX4+vv/4a1ar93zhLamoqfvnllxLrQ2ljQVRJ+Pn5YcuWLZg4cSKaNGlS3um8MSEEMjIypF9ERKUhPT0dRkZGleLbaWVhbm6OGzdulMq627Zti1u3bsHAwEBq8/HxQVZWFr7//nvMmDEDjo6OAID9+/fj0KFDUhEEAB06dMCdO3fwxRdfoG/fvtDT04OxsTHi4+NhZWUlrbN9+/ZwcnLCRx99hO3bt+Pjjz8GANSoUQPbtm1Tycnb2xuZmZmYP38+/v77b7z11lsl2uf33ntPKkpmz54ttd+6dQvHjx/HsGHDsHr16jfaRnx8PPr164eGDRvi6NGjUCgU0rKOHTti3Lhx2Llzp8prqlWrhtatWxdp/X379sWPP/6Iw4cPw8fHR2rftm0bcnNz0aNHD2zatOmN+lAWeMiskpg0aRKsrKzw5ZdfvjY2IyMDoaGhqFu3LgwNDVGrVi2MHj1abXjY2dkZAQEB2Lt3L5o1awZjY2O4urpi7969AF4M47u6usLU1BTvvvsuzp07p3F7V69eRadOnWBqaoqaNWtizJgxeP78uUqMTCbDmDFj8MMPP8DV1RVyuRwbNmwAANy4cQOBgYGwsbGBXC6Hq6srvv/++yK9LykpKRg+fDisrKxQvXp1+Pn54a+//irSa7dt24bOnTvD3t5e6vvkyZM1Do2fOXMGXbt2hZWVFYyMjFCvXj2NIzcPHz5E//79oVAoYGtri+DgYCiVSpWY77//Hu+//z5sbGxgamoKd3d3zJ8/H9nZ2VJM+/btsW/fPty5c0fjUPWKFSvQpEkTVK9eHWZmZnj77bfx1VdfvbbPM2bMQKtWrWBpaQlzc3M0b94ca9aswav3eC5oCNzZ2VnjUPyrMjMzMXPmTLi6usLIyAhWVlbo0KEDoqKipJii7qdFzSX/sNPBgwcRHByMmjVrwsTEBJmZmXj06BFGjBgBR0dHyOVy1KxZE23bti3SN/59+/ahadOmkMvlqFu3Lr777juNcUIILF++HE2bNoWxsTEsLCzw4Ycf4u+//37tNoqqqJ9f/s92REQEmjdvDmNjY7z99ttqIyz579nRo0cxcuRIWFtbw8rKCr169cKDBw/Utr9t2zZ4enrC1NQU1atXh6+vr8qoQr5z586hW7dusLS0hJGREZo1a4aff/75tf2zsLBQKYbyvfvuuwCA+/fvS207d+5E9erV8dFHH6nEDhkyBA8ePJDmNunp6akUQ6+u8969e6/Nq2bNmgAAff2SH0OoVq0aBg4ciA0bNiAvL09qX7t2LRwdHeHt7f3G21i4cCHS0tKwfPlylWIon0wmQ69evYq9fhcXF7Rp00Zt/1q7di169eqlcZsVEUeIKgkzMzNMnToVn332GY4cOYKOHTtqjBNCoEePHjh8+DBCQ0PRrl07XLp0CdOnT8epU6dw6tQpyOVyKf7ixYsIDQ3FlClToFAoMGPGDPTq1QuhoaE4fPgw5syZA5lMhi+//BIBAQGIj49XGdXJzs7GBx98gE8++QSTJ09GVFQUZs2ahTt37mDPnj0que3atQt//PEHvv76a9jZ2cHGxgbXrl1DmzZt4OTkhAULFsDOzg4HDhzAuHHj8O+//2L69OkFvif5fY2KisLXX3+Nd955BydPnoS/v79a7ODBg9X+kN+4cQMffPABQkJCYGpqij///BPz5s3D2bNnVYaKDxw4gK5du8LV1RULFy6Ek5MTbt++jYMHD6ptp3fv3ujbty+GDh2Ky5cvIzQ0FABUflHcunULgYGBUiFw8eJFzJ49G3/++acUt3z5cowYMQK3bt1S++YWHh6OUaNGYezYsfjuu+9QrVo13Lx5E9euXSvwvcp3+/ZtfPLJJ3BycgIAnD59GmPHjsU///yDr7/++rWvL4qcnBz4+/vjjz/+QEhICDp27IicnBycPn0ad+/eRZs2bbTeT7URHByMLl26YOPGjUhLS4OBgQGCgoJw/vx5zJ49Gw0bNsTTp09x/vx5PH78uNB1HT58GN27d4enpyfCw8ORm5uL+fPn4+HDh2qxn3zyCdavX49x48Zh3rx5ePLkCWbOnIk2bdrg4sWLsLW11aof69evV2vT5vO7ePEiJkyYgMmTJ8PW1hY//vgjhg4divr16+P9999XiR02bBi6dOmCLVu24N69e/jiiy/w8ccfq/wczJkzB1OnTsWQIUMwdepUZGVl4dtvv0W7du1w9uxZNGrUCABw9OhR+Pn5oVWrVvjhhx+gUCgQHh6Ovn374vnz50UqqF915MgR6Ovro2HDhlLblStX4OrqqlakeHh4SMvbtGlT6DoBoHHjxmrLhBDIzc3F8+fPERUVhQULFqB///7S+17SgoODMXfuXBw4cAD+/v7Izc3Fhg0bMHToUJVDUMV18OBB2NraFnnEJ19OTo5aW7Vq1TTmNHToUIwePRrJycmwsLDA9evXpb8H27dvL3buZUpQhbZu3ToBQERHR4vMzEzx1ltviZYtW4q8vDwhhBBeXl6icePGUnxERIQAIObPn6+ynm3btgkAYtWqVVJbnTp1hLGxsbh//77UFhsbKwAIe3t7kZaWJrXv2rVLABC7d++W2gYNGiQAiCVLlqhsa/bs2QKAOHHihNQGQCgUCvHkyROVWF9fX1G7dm2hVCpV2seMGSOMjIzU4l/222+/Fbr96dOnF/jaV+Xl5Yns7Gxx7NgxAUBcvHhRWlavXj1Rr149kZ6eXuDrp0+frvF9HzVqlDAyMpI+r1fl5uaK7Oxs8dNPPwk9PT2V/nbp0kXUqVNH7TVjxowRNWrUKHLfCpK/7ZkzZworKyuVHAt6/+rUqSMGDRpU6Hp/+uknAUCsXr26wBht9tO
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Graphique en nombre de commande\n",
"purchases_graph = nb_purchases_graph\n",
"\n",
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,3,1)]\n",
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
"\n",
"\n",
"# Création du barplot\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Nouveau client\")\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_purchases\"], \n",
" bottom = purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Ancien client\")\n",
"\n",
"\n",
"# commande pr afficher slt\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Mois')\n",
"plt.ylabel(\"Nombre d'achats\")\n",
"plt.title(\"Nombre d'achats - MUCEM\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d312276c-4c46-4d29-b6d6-ed110f59890d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHGCAYAAADg0eryAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5XElEQVR4nO3dd1gU1/s28Hul1xWkK82GDexRNAp2VLAmdgR7YotRYzTGiIk91mBi1Ngb6jdqrCgWMCooqNhjjMEOYqQp0jnvH77Mz6XJwi6g3J/r2kv2zNmZ58ye3X08M2dGJoQQICIiIqIKp1JZB0BEREREZYOJIBEREVEFxUSQiIiIqIJiIkhERERUQTERJCIiIqqgmAgSERERVVBMBImIiIgqKCaCRERERBUUE0EiIiKiCoqJIBG9t7755htYWlri3r17ZR0KEf1/x44dg5aWFvbv31/WoVARlCgR3LRpE2QyGXR1dfHgwYM8y93d3dGgQYOSbKLYfH19YWhoWCbbfheZTAY/P79S3aa7uzvc3d1LPY4jR46UelvV7ZdffsGmTZvKOowPnp+fH2QyWYHLjx49ilWrVuHQoUOoUaNGKUZWuKdPn8LPzw+RkZF5lr2rTQVR5XfprVu34Ofnh/v376tkfeVN7u+14OBgyGSyYrfX398fNWvWhLa2NmQyGRISEgAA3377Lezs7KCpqYnKlSuXOO7CuLu7w9fXV63bUJXHjx9jyJAhWLlyJXr16lWsddy/fx8ymYzfs8WQk5cpQyUjgmlpafj2229VsSoqRaGhoRg5cqRat3HkyBHMmTNHrdsobUwEy96jR48wbNgw7Nq1C82bNy/rcBQ8ffoUc+bMyTcRHDlyJEJDQ0s/qLfcunULc+bM+WATQVWKjIzExIkT0a5dO5w6dQqhoaEwMjLCH3/8gXnz5mHo0KEICQnBiRMnyjrUciEzMxP9+/fH6NGjMXbs2LIOh4pIUxUr8fDwwI4dOzB16lQ0bNhQFassU0IIpKamQk9Pr6xDUauWLVuWdQhExWJra4uYmJiyDkNp1apVQ7Vq1co6jPdaRkYGZDIZNDVV8vNVqJs3bwIARo0ahY8++kgqv3HjBgBg4sSJsLCwKHQdKSkpH/xvSQ5NTU2cO3eurMNQudevX0NfX7+sw1AblYwITps2DVWqVMHXX3/9zrqpqamYMWMGHB0doa2tjapVq2LcuHHScHsOBwcHeHp64tChQ2jcuDH09PRQt25dHDp0CMCb4c+6devCwMAAH330ESIiIvLd3s2bN9GhQwcYGBjA3Nwc48ePx+vXrxXqyGQyjB8/Hr/++ivq1q0LHR0dbN68GQBw9+5dDBo0CBYWFtDR0UHdunXx888/F2m/JCUlYdSoUahSpQoMDQ3h4eGBv//+O9+6JdlOdnY2/P390ahRI+jp6aFy5cpo2bIlDhw4UOjr8js0HBMTgzFjxqBatWrQ1taGo6Mj5syZg8zMTKlOzrD9kiVLsGzZMjg6OsLQ0BCurq4ICwuT6vn6+kptkMlk0qOwkYigoCD07NkT1apVg66uLmrWrIkxY8bgv//+k+r8+eefkMlk2LlzZ57Xb9myBTKZDOHh4QCAiIgIDBgwAA4ODtDT04ODgwMGDhyY51SGnOH006dP4/PPP4eZmRmqVKmCPn364OnTp1I9BwcH3Lx5EyEhIVJ7HBwcCt3PRXl/srOzsXjxYtSpUwc6OjqwsLDA0KFD8fjxY4V15RwiDA0NRatWraQ2bdy4EQBw+PBhNGnSBPr6+nB2dkZgYKDC63MOTV67dg2ffvop5HI5TE1NMXnyZGRmZuLOnTvw8PCAkZERHBwcsHjxYoXXp6amYsqUKWjUqJH0WldXV/zxxx952p3zudq6dSvq1q0LfX19NGzYUPoMv+3w4cNo1KgRdHR04OjoiCVLluS7L4UQ+OWXX6R9aWJigk8++QT//vtvoe8B8KY/5vde5Xe4VpnY3xYcHCyNUA4bNkzqIzmfs4IODe/YsQOurq4wNDSEoaEhGjVqhPXr1xe6rX379kFfXx8jR46UPp8RERHo0aMHTE1Noauri8aNG2P37t3SazZt2oRPP/0UANCuXTspvsJGuHNivnLlCvr06QNjY2PI5XIMGTIEz58/V6hb1H7s4OCQ76HO3Kew5BzW3bp1K6ZMmYKqVatCR0cH//zzT6H7pig2bNiAhg0bQldXF6ampujduzdu376tEMuQIUMAAC1atIBMJpP6UM4RMEtLS4X3N+d3a+/evWjcuDF0dXWlIyJF+W4tqpz9smPHDnz99dewtraGoaEhvLy88OzZM7x8+RKjR4+GmZkZzMzMMGzYMLx69Up6fWGHXnP/Ljx//hyjR4+Gra0tdHR0YG5ujtatW+cZBT1x4gQ6dOgAY2Nj6Ovro3Xr1jh58qRCnX/++QfDhg1DrVq1oK+vj6pVq8LLywvXr18vUrtL8juZkJCAESNGwNTUFIaGhujevTv+/fffPO3N6e+XL1/GJ598AhMTE+nUk6LmLwWddpW73+f87gQFBWHYsGEwNTWFgYEBvLy88v1OK8o+LhZRAhs3bhQARHh4uFi5cqUAIE6ePCktd3NzE/Xr15eeZ2dniy5dughNTU0xa9Yscfz4cbFkyRJhYGAgGjduLFJTU6W69vb2olq1aqJBgwZi586d4siRI6JFixZCS0tLfPfdd6J169Zi7969Yt++faJ27drC0tJSvH79Wnq9j4+P0NbWFnZ2dmLevHni+PHjws/PT2hqagpPT0+FdgAQVatWFS4uLmLHjh3i1KlT4saNG+LmzZtCLpcLZ2dnsWXLFnH8+HExZcoUUalSJeHn51fovsnOzhbt2rUTOjo60vZnz54tqlevLgCI2bNnS3VLsh0hhPD29hYymUyMHDlS/PHHH+Lo0aNi3rx5YuXKlQrvhZubW552vx1HdHS0sLW1Ffb29mLNmjXixIkT4ocffhA6OjrC19dXqhcVFSUACAcHB+Hh4SH2798v9u/fL5ydnYWJiYlISEgQQgjxzz//iE8++UQAEKGhodLj7fc5t9WrV4sFCxaIAwcOiJCQELF582bRsGFD4eTkJNLT06V6jRs3Fq1bt87z+ubNm4vmzZtLz/fs2SO+++47sW/fPhESEiICAgKEm5ubMDc3F8+fP5fq5fTl6tWriwkTJohjx46J3377TZiYmIh27dpJ9S5fviyqV68uGjduLLXn8uXLhbw7RXt/Ro8eLQCI8ePHi8DAQPHrr78Kc3NzYWtrqxCnm5ubqFKlinBychLr168Xx44dE56engKAmDNnjnB2dpY+Ly1bthQ6OjriyZMn0utnz54tAAgnJyfxww8/iKCgIDFt2jRp23Xq1BE//fSTCAoKEsOGDRMAxO+//y69PiEhQfj6+oqtW7eKU6dOicDAQDF16lRRqVIlsXnzZoV25/SRjz76SOzevVscOXJEuLu7C01NTXHv3j2p3okTJ4SGhob4+OOPxd69e8WePXtE8+bNhZ2dncj9FTVq1CihpaUlpkyZIgIDA8WOHTtEnTp1hKWlpYiJiSn0ffDx8RH29vZ5ynP2SXFizy0xMVHqS99++63URx49elTgtmbNmiUAiD59+og9e/aI48ePi2XLlolZs2ZJdXJ/ly5btkxoaGiIH374QSo7deqU0NbWFm3atBG7du0SgYGBwtfXVwAQGzduFEIIERsbK+bPny8AiJ9//lmKLzY2tsA25cRsb28vvvrqK3Hs2DGxbNky6Xv77c9lUfuxvb298PHxybOt3N9Tp0+flr6fP/nkE3HgwAFx6NAh8eLFiwLjzf29lp+cfTBw4EBx+PBhsWXLFlG9enUhl8vF33//LYR487387bffSvsvNDRU/PPPP+Ly5ctixIgRAoAIDAxUeH/t7e2FtbW1qF69utiwYYM4ffq0uHjxYpG/W4sqZ7/Y29sLX19faV8bGhqKdu3aiU6dOompU6eK48ePi0WLFgkNDQ0xYcIE6fU53+E5/aKw/delSxdhbm4u1q5dK4KDg8X+/fvFd999JwICAqQ6W7duFTKZTPTq1Uvs3btXHDx4UHh6egoNDQ1
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# graphique en nombre de client ayant commandé\n",
"purchases_graph = nb_purchases_graph_2\n",
"\n",
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,4,1)]\n",
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
"\n",
"\n",
"# Création du barplot\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Nouveau client\")\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_new_customer\"], \n",
" bottom = purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Ancien client\")\n",
"\n",
"\n",
"# commande pr afficher slt\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Mois')\n",
"plt.ylabel(\"Nombre de client ayant commandé\")\n",
"plt.title(\"Nombre de client ayant commandé un ticket pour l'offre 'muséale groupe'\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "82895dfc-e5ca-4be0-af24-93c1be8f6248",
"metadata": {},
"source": [
"### Proportion de tickets de prix 0"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "6e27dd83-f188-43a5-b595-618b4922a358",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ticket_id 0.418220\n",
"customer_id 0.418220\n",
"purchase_id 0.418220\n",
"event_type_id 0.418220\n",
"supplier_name 0.418220\n",
"purchase_date 0.418220\n",
"type_of_ticket_name 0.418220\n",
"amount 0.418220\n",
"children 0.418220\n",
"is_full_price 0.418220\n",
"name_event_types 0.418220\n",
"name_facilities 0.418220\n",
"name_categories 0.402548\n",
"name_events 0.175585\n",
"name_seasons 0.418220\n",
"dtype: float64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases[purchases['amount'] == 0].count()/len(purchases)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "f663d68b-8a5c-4804-b31a-4477a03ca1e4",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>purchase_id</th>\n",
" <th>ticket_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>73518.000000</td>\n",
" <td>7.351800e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>10.096167</td>\n",
" <td>2.484660e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2367.702603</td>\n",
" <td>4.636993e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.000000</td>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.000000</td>\n",
" <td>3.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>641981.000000</td>\n",
" <td>1.256574e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" purchase_id ticket_id\n",
"count 73518.000000 7.351800e+04\n",
"mean 10.096167 2.484660e+01\n",
"std 2367.702603 4.636993e+03\n",
"min 1.000000 1.000000e+00\n",
"25% 1.000000 1.000000e+00\n",
"50% 1.000000 2.000000e+00\n",
"75% 1.000000 3.000000e+00\n",
"max 641981.000000 1.256574e+06"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases.groupby('customer_id')[['purchase_id', 'ticket_id']].nunique().describe()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d1212b10-3933-450a-b001-9e2cbf308f79",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>type_of_ticket_name</th>\n",
" <th>amount</th>\n",
" <th>children</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13070859</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>8.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13070860</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13070861</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13070862</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13070863</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826667</th>\n",
" <td>20662815</td>\n",
" <td>1256135</td>\n",
" <td>8007697</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 17:23:54+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826668</th>\n",
" <td>20662816</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826669</th>\n",
" <td>20662817</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826670</th>\n",
" <td>20662818</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826671</th>\n",
" <td>20662819</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1826672 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 13070859 48187 5107462 4 vente en ligne \n",
"1 13070860 48187 5107462 4 vente en ligne \n",
"2 13070861 48187 5107462 4 vente en ligne \n",
"3 13070862 48187 5107462 4 vente en ligne \n",
"4 13070863 48187 5107462 4 vente en ligne \n",
"... ... ... ... ... ... \n",
"1826667 20662815 1256135 8007697 5 vente en ligne \n",
"1826668 20662816 1256136 8007698 5 vente en ligne \n",
"1826669 20662817 1256136 8007698 5 vente en ligne \n",
"1826670 20662818 1256137 8007699 5 vente en ligne \n",
"1826671 20662819 1256137 8007699 5 vente en ligne \n",
"\n",
" purchase_date type_of_ticket_name amount \\\n",
"0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
"1 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"2 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"3 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"4 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"... ... ... ... \n",
"1826667 2023-11-08 17:23:54+00:00 Atelier 11.0 \n",
"1826668 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826669 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826670 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"1826671 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"\n",
" children is_full_price name_event_types name_facilities \\\n",
"0 pricing_formula False spectacle vivant mucem \n",
"1 pricing_formula False spectacle vivant mucem \n",
"2 pricing_formula False spectacle vivant mucem \n",
"3 pricing_formula False spectacle vivant mucem \n",
"4 pricing_formula False spectacle vivant mucem \n",
"... ... ... ... ... \n",
"1826667 pricing_formula False offre muséale groupe mucem \n",
"1826668 pricing_formula False offre muséale groupe mucem \n",
"1826669 pricing_formula False offre muséale groupe mucem \n",
"1826670 pricing_formula False offre muséale groupe mucem \n",
"1826671 pricing_formula False offre muséale groupe mucem \n",
"\n",
" name_categories name_events name_seasons \n",
"0 indiv prog enfant l'école des magiciens 2018 \n",
"1 indiv prog enfant l'école des magiciens 2018 \n",
"2 indiv prog enfant l'école des magiciens 2018 \n",
"3 indiv prog enfant l'école des magiciens 2018 \n",
"4 indiv prog enfant l'école des magiciens 2018 \n",
"... ... ... ... \n",
"1826667 indiv entrées tp NaN 2023 \n",
"1826668 indiv entrées tp NaN 2023 \n",
"1826669 indiv entrées tp NaN 2023 \n",
"1826670 indiv entrées tp NaN 2023 \n",
"1826671 indiv entrées tp NaN 2023 \n",
"\n",
"[1826672 rows x 15 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases"
]
},
{
"cell_type": "markdown",
"id": "b8a90eaa-c383-4f73-9fd6-6fbbe8eeefb8",
"metadata": {},
"source": [
"# 2 - Comportement d'achat bis (Alexis)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "dc45c1cd-2a78-48a6-aa2b-6a501254b6f2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(156289, 40)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>birthdate</th>\n",
" <th>street_id</th>\n",
" <th>is_partner</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>...</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>name_event_types</th>\n",
" <th>avg_amount</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3262.190868</td>\n",
" <td>4.179306</td>\n",
" <td>3258.011562</td>\n",
" <td>51.0</td>\n",
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>2502.715509</td>\n",
" <td>1408.715532</td>\n",
" <td>1093.999977</td>\n",
" <td>5.0</td>\n",
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3698.198229</td>\n",
" <td>5.221840</td>\n",
" <td>3692.976389</td>\n",
" <td>2988.0</td>\n",
" <td>spectacle vivant</td>\n",
" <td>7.762474</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3803.369792</td>\n",
" <td>0.146331</td>\n",
" <td>3803.223461</td>\n",
" <td>9.0</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>4.452618</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1705.261192</td>\n",
" <td>1456.333715</td>\n",
" <td>248.927477</td>\n",
" <td>0.0</td>\n",
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id birthdate street_id is_partner gender is_email_true \\\n",
"0 1 NaN 2 False 2 True \n",
"1 1 NaN 2 False 2 True \n",
"2 1 NaN 2 False 2 True \n",
"3 1 NaN 2 False 2 True \n",
"4 2 NaN 2 False 1 True \n",
"\n",
" opt_in structure_id profession language ... vente_internet_max \\\n",
"0 False NaN NaN NaN ... 1.0 \n",
"1 False NaN NaN NaN ... 1.0 \n",
"2 False NaN NaN NaN ... 1.0 \n",
"3 False NaN NaN NaN ... 1.0 \n",
"4 True NaN NaN NaN ... 0.0 \n",
"\n",
" purchase_date_min purchase_date_max time_between_purchase \\\n",
"0 3262.190868 4.179306 3258.011562 \n",
"1 2502.715509 1408.715532 1093.999977 \n",
"2 3698.198229 5.221840 3692.976389 \n",
"3 3803.369792 0.146331 3803.223461 \n",
"4 1705.261192 1456.333715 248.927477 \n",
"\n",
" nb_tickets_internet name_event_types avg_amount nb_campaigns \\\n",
"0 51.0 offre muséale individuel 6.150659 NaN \n",
"1 5.0 formule adhésion 6.439463 NaN \n",
"2 2988.0 spectacle vivant 7.762474 NaN \n",
"3 9.0 offre muséale groupe 4.452618 NaN \n",
"4 0.0 formule adhésion 6.439463 4.0 \n",
"\n",
" nb_campaigns_opened time_to_open \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
"[5 rows x 40 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Chargement des données temporaires\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Temp/Company 1 - customer_event.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customer = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(customer.shape)\n",
"customer.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "89fcb455-efb4-4ad4-ab88-efd6c8a76287",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'birthdate', 'street_id', 'is_partner', 'gender',\n",
" 'is_email_true', 'opt_in', 'structure_id', 'profession', 'language',\n",
" 'mcp_contact_id', 'last_buying_date', 'max_price', 'ticket_sum',\n",
" 'average_price', 'fidelity', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'age', 'tenant_id',\n",
" 'event_type_id', 'nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',\n",
" 'name_event_types', 'avg_amount', 'nb_campaigns', 'nb_campaigns_opened',\n",
" 'time_to_open'],\n",
" dtype='object')"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer.columns"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "d7b2356a-d5fc-4547-b3ff-fded0e304fb6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>purchase_count</th>\n",
" <th>total_price</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>7.030122</td>\n",
" <td>-67.790969</td>\n",
" <td>13.751530</td>\n",
" <td>1.956087</td>\n",
" <td>641472</td>\n",
" <td>8821221.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>307</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>3</td>\n",
" <td>18.333333</td>\n",
" <td>30.666667</td>\n",
" <td>36.666667</td>\n",
" <td>2.000000</td>\n",
" <td>3</td>\n",
" <td>110.0</td>\n",
" <td>222.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>4</td>\n",
" <td>10.250000</td>\n",
" <td>5.000000</td>\n",
" <td>20.500000</td>\n",
" <td>2.000000</td>\n",
" <td>2</td>\n",
" <td>41.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>5</td>\n",
" <td>9.500000</td>\n",
" <td>0.000000</td>\n",
" <td>19.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1</td>\n",
" <td>19.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id average_price average_purchase_delay average_price_basket \\\n",
"0 1 7.030122 -67.790969 13.751530 \n",
"4 2 0.000000 0.000000 0.000000 \n",
"6 3 18.333333 30.666667 36.666667 \n",
"7 4 10.250000 5.000000 20.500000 \n",
"9 5 9.500000 0.000000 19.000000 \n",
"\n",
" average_ticket_basket purchase_count total_price nb_campaigns \\\n",
"0 1.956087 641472 8821221.5 0.0 \n",
"4 1.000000 307 0.0 4.0 \n",
"6 2.000000 3 110.0 222.0 \n",
"7 2.000000 2 41.0 7.0 \n",
"9 2.000000 1 19.0 4.0 \n",
"\n",
" nb_campaigns_opened \n",
"0 0.0 \n",
"4 0.0 \n",
"6 124.0 \n",
"7 7.0 \n",
"9 0.0 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"achat = ['customer_id', 'average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'purchase_count', 'total_price', 'nb_campaigns',\n",
" 'nb_campaigns_opened']\n",
"\n",
"customer_achat = customer[achat].drop_duplicates(subset = ['customer_id'])\n",
"customer_achat['nb_campaigns'] = customer_achat['nb_campaigns'].fillna(0)\n",
"customer_achat['nb_campaigns_opened'] = customer_achat['nb_campaigns_opened'].fillna(0)\n",
"customer_achat = customer_achat.fillna(0)\n",
"customer_achat.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "5559748f-1745-4651-a9f6-94702c7ee66f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>purchase_count</th>\n",
" <th>total_price</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>5.252070</td>\n",
" <td>-206.581486</td>\n",
" <td>11.451596</td>\n",
" <td>1.723372</td>\n",
" <td>0.655148</td>\n",
" <td>16.994064</td>\n",
" <td>40.923241</td>\n",
" <td>7.870681</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>7.915955</td>\n",
" <td>2996.743657</td>\n",
" <td>48.271194</td>\n",
" <td>7.045950</td>\n",
" <td>5.694038</td>\n",
" <td>313.099102</td>\n",
" <td>70.445724</td>\n",
" <td>23.119061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>-44863.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>5.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>11.000000</td>\n",
" <td>0.000000</td>\n",
" <td>19.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>20.000000</td>\n",
" <td>32.000000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>320.000000</td>\n",
" <td>1914.000000</td>\n",
" <td>9900.000000</td>\n",
" <td>900.000000</td>\n",
" <td>1508.000000</td>\n",
" <td>64350.000000</td>\n",
" <td>439.000000</td>\n",
" <td>434.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" average_price average_purchase_delay average_price_basket \\\n",
"count 151865.000000 151865.000000 151865.000000 \n",
"mean 5.252070 -206.581486 11.451596 \n",
"std 7.915955 2996.743657 48.271194 \n",
"min 0.000000 -44863.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 \n",
"75% 11.000000 0.000000 19.000000 \n",
"max 320.000000 1914.000000 9900.000000 \n",
"\n",
" average_ticket_basket purchase_count total_price nb_campaigns \\\n",
"count 151865.000000 151865.000000 151865.000000 151865.000000 \n",
"mean 1.723372 0.655148 16.994064 40.923241 \n",
"std 7.045950 5.694038 313.099102 70.445724 \n",
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 2.000000 \n",
"50% 0.000000 0.000000 0.000000 5.000000 \n",
"75% 2.000000 1.000000 20.000000 32.000000 \n",
"max 900.000000 1508.000000 64350.000000 439.000000 \n",
"\n",
" nb_campaigns_opened \n",
"count 151865.000000 \n",
"mean 7.870681 \n",
"std 23.119061 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 1.000000 \n",
"75% 3.000000 \n",
"max 434.000000 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer_wto_outlier = customer_achat[customer_achat['customer_id']!=1]\n",
"\n",
"customer_wto_outlier[['average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'purchase_count', 'total_price', 'nb_campaigns', 'nb_campaigns_opened']].describe()"
]
},
{
"cell_type": "markdown",
"id": "b49c9e93-f324-42ee-a262-34ffb44a2261",
"metadata": {},
"source": [
"# Event"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "4971e35d-a762-4e18-9443-fd9571bd3f1e",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAJgCAYAAACXyFewAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACRpklEQVR4nOzdeXxM1/8/8NdkXyQjkZ2IRAiRxFqRqJ2EirVKG9LYtUpqSa211lZ76aeWqtrXompLY6/YhSRUKBUEiSAbQURyfn/45X6NCZ1h4o54PR+PeTzMuWfuvGZJvHPuuecqhBACRERERPRKBnIHICIiInoXsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiLSUwkJCbCwsMD8+fPfaB9KpfKN9kHPsGgqJsuWLYNCoYCZmRmuXbumtr1x48bw8fGRIRnQvXt3lCpVSpbn/i8KhQLjx4+XOwa9hp07d/Kze8GBAwegUChw4MABuaO8886fP4/x48fj6tWrckd5bY0bN0bjxo2l+1evXn3l9+P+/fvo1KkTBg4ciIEDB7728/r5+WHLli0YNWoUjh8//tr7KQmOHDmC8ePHIzMz87Uez6KpmOXm5uLbb7+VOwZRsdu5cycmTJggdwy9UqtWLRw9ehS1atWSO8o77/z585gwYcI7XTRpq1evXqhbty6mTZv2xvtq2rQpFi1ahC5duuDevXs6SPduOnLkCCZMmMCiSV+1bNkSa9asQXx8vNxRdEIIgUePHskdg0hNXl4enj59KncMFdbW1qhXrx6sra3ljvJOePjwodwR9MqGDRuwatUqKBQKnewvNDQUV69eRZkyZXSyv/cRi6ZiNmzYMJQpUwbDhw//z76PHz/GyJEj4e7uDhMTE5QtWxZfffWVWkVcoUIFhISEYPv27ahZsybMzc1RtWpVbN++HcCzQ4NVq1aFpaUl6tati1OnThX5fH///TeaNWsGS0tL2NvbY8CAAWq/tBQKBQYMGICFCxeiatWqMDU1xfLlywEAly5dQmhoKBwcHGBqaoqqVavif//7n0bvS3Z2Nvr06YMyZcqgVKlSaNmyJf75558i+77J8xQUFGD+/PmoUaMGzM3NUbp0adSrVw9//PGHSp/p06ejSpUqMDU1hYODAz7//HPcuHFDZV+Fh1RPnjyJBg0awMLCAh4eHpg2bRoKCgpU9jdp0iR4eXlJz+nn54cffvhBZX8xMTFo1qwZrKysYGFhgcDAQOzYsUOlT+Fh3n379knvl7W1NT7//HPk5OQgNTUVnTt3RunSpeHs7IzIyEjk5eVJjy8c/p8xYwa+//57VKhQAebm5mjcuDH++ecf5OXlYcSIEXBxcYFSqUSHDh2QlpamkmH9+vUICgqCs7Oz9F0bMWIEcnJypD7du3eXPhOFQiHdCkcFhBD46aefpM/BxsYGnTp1wpUrV1Seq0KFCujevbva5/jiYY3Cw14rV67E0KFDUbZsWZiamuLy5ct4+PAhIiMj4e7uDjMzM9ja2qJOnTpYu3at2n6fN378+CL/cyr8DJ4f4Sj8GYyKikKtWrVgbm6OKlWqYOnSpSqPfdnhuWXLlsHLy0v6Pq9YsQLdu3dHhQoV/vOxhZ/psmXLVNpPnTqFtm3bwtbWFmZmZqhZsyY2bNig0ud135vC92D37t3o0aMHbG1tYWlpiTZt2qh9hrt370a7du1Qrlw5mJmZwdPTE/369cPdu3dV+hW+36dPn0anTp1gY2ODihUrvvT5P/nkEwBAkyZNpO/XsmXL8N1338HIyAjJyclqj+vZsyfKlCmDx48fA/i/z23Lli3w8/ODmZkZPDw8MG/ePLXHZmdnS+9V4e/jQYMGqXzvX0YIgenTp8PNzQ1mZmaoVasWdu3a9Z+PK/Rfn2V8fDwUCgV++eUXtcfu2rULCoVC5Xfc879DTUxMUKVKFbXXXPh9W7t2LUaPHg0XFxdYW1ujefPmuHjxotrz7NmzB82aNYO1tTUsLCxQv3597N27V6VP4WeckJCATz75BEqlEra2thgyZAiePn2KixcvomXLlrCyskKFChUwffp0tefR9HMo/L9q5cqVqFq1KiwsLFC9enXp/8XCPN988w0AwN3dXfoeaXX4XFCx+PXXXwUAcfLkSfHDDz8IAGLv3r3S9kaNGolq1apJ9wsKCkRwcLAwMjISY8aMEdHR0WLmzJnC0tJS1KxZUzx+/Fjq6+bmJsqVKyd8fHzE2rVrxc6dO4W/v78wNjYWY8eOFfXr1xebN28WW7ZsEZUrVxaOjo7i4cOH0uPDw8OFiYmJKF++vJg8ebKIjo4W48ePF0ZGRiIkJETldQAQZcuWFX5+fmLNmjVi37594ty5c+Lvv/8WSqVS+Pr6ihUrVojo6GgxdOhQYWBgIMaPH//K96agoEA0adJEmJqaSs8/btw44eHhIQCIcePGSX3f5HmEECIsLEwoFArRu3dvsXXrVrFr1y4xefJk8cMPP0h9+vbtKwCIAQMGiKioKLFw4UJhb28vXF1dxZ07d1Q+szJlyohKlSqJhQsXit27d4v+/fsLAGL58uVSv6lTpwpDQ0Mxbtw4sXfvXhEVFSXmzp2rkvfAgQPC2NhY1K5dW6xfv178/vvvIigoSCgUCrFu3TqpX+H3yN3dXQwdOlRER0eL77//XhgaGorPPvtM1KpVS0yaNEns3r1bDB8+XAAQs2bNkh6flJQkAAg3NzfRpk0bsX37drFq1Srh6OgoKleuLMLCwkTPnj3Frl27xMKFC0WpUqVEmzZtVN7D7777TsyZM0fs2LFDHDhwQCxcuFC4u7uLJk2aSH0uX74sOnXqJACIo0ePSrfC722fPn2EsbGxGDp0qIiKihJr1qwRVapUEY6OjiI1NVXaj5ubmwgPD1f7HBs1aiQaNWok3d+/f7/03ezUqZP4448/xPbt28W9e/dEv379hIWFhZg9e7bYv3+/2L59u5g2bZqYP3/+K78r48aNE0X9Siz8DJKSklRylitXTnh7e4sVK1aIP//8U3zyyScCgDh48KBazv3796vtr127dmLbtm1i1apVwtPTU7i6ugo3N7dXPlaI//tMf/31V6lt3759wsTERDRo0ECsX79eREVFie7du6v1e933pjCzq6ur9H1ZvHixcHBwEK6uriIjI0Pqu2DBAjF16lTxxx9/iIMHD4rly5eL6tWrCy8vL/HkyRO199vNzU0MHz5c7N69W/z+++9FPn9aWpqYMmWKACD+97//Sd+vtLQ0cfv2bWFqaipGjx6t8ph79+4Jc3Nz8c0330htbm5uomzZsqJ8+fJi6dKlYufOnaJr164CgJgxY4bULycnR9SoUUPY2dmJ2bNniz179ogffvhBKJVK0bRpU1FQUPDK96vwtfXq1Ut6r8qWLSucnJxUvsdF0fSzrFmzpqhfv77a4zt37iwcHBxEXl6eEKLo36GDBw8WCoVCjBkzRnpc4fetQoUKomvXrmLHjh1i7dq1onz58qJSpUri6dOnUt+VK1cKhUIh2rdvLzZv3iy2bdsmQkJChKGhodizZ4/a++Dl5SW+++47sXv3bjFs2DDp922VKlXEvHnzxO7du0WPHj0EALFp06bX+hwKs9etW1ds2LBB7Ny5UzRu3FgYGRmJf//9VwghRHJyshg4cKAAIDZv3ix9j7Kysl75mTyPRVMxeb5oys3NFR4eHqJOnTrSh/xi0RQVFSUAiOnTp6vsZ/369QKAWLx4sdTm5uYmzM3NxY0bN6S2uLg4AUA4OzuLnJwcqf33338XAMQff/whtYWHhwsAKoWDEEJMnjxZABAxMTFSGwChVCpFenq6St/g4GBRrlw5tS/bgAEDhJmZmVr/5+3ateuVz/980fQmz/PXX38JAGq/TJ+XmJgoAIj+/furtB8/flwAEKNGjZLaGjVqJACI48ePq/T19vYWwcHB0v2QkBBRo0aNlz6nEELUq1dPODg4iPv370ttT58+FT4+PqJcuXLS96TwezRw4ECVx7dv314AELNnz1Zpr1GjhqhVq5Z0v/A
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre de consommateurs uniques par type d'évènement\n",
"\n",
"event_counts = customer.groupby('name_event_types')['customer_id'].nunique()\n",
"\n",
"event_counts.plot(kind='bar')\n",
"plt.xlabel(\"Type d'évènement\")\n",
"plt.ylabel('Nombre de consommateurs uniques')\n",
"plt.title(\"Nombre de consommateurs uniques par type d'évènement\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "bc65a711-d172-4839-b487-3047280fc3a6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAJICAYAAABMlwOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACqtUlEQVR4nOzdd1xT1/8/8FdYYQgRRUAcuHGAWxFtRavgwj1asQhq0bqQonXUuhVn1dbd1r1w103BhaKiOFCxzn5cKIgiWzbn90d/3K8RB4lgGK/n48FDc+47yTs3J5c3J+eeKxNCCBARERERkUq0NJ0AEREREVFRxEKaiIiIiEgNLKSJiIiIiNTAQpqIiIiISA0spImIiIiI1MBCmoiIiIhIDSykiYiIiIjUwEKaiIiIiEgNLKSJiIiIiNTAQpqIiAqVpKQk1K5dG3379kV2dram06FPlB/vZ0JCAho2bIg+ffrkc3ZEn4aFNBV6GzZsgEwmg76+Ph49epRre5s2bWBra6uBzAAPDw+UKlVKI889ffp0yGSyj/60adPmo4917tw5TJ8+HXFxcZ+cj7o8PDxQpUoVte//tiNHjmD69Onv3FalShV4eHio9HgPHz6ETCbDokWLPj05ANu2bcPSpUvz5bEKQkF8rnx9ffHXX399NM7T0xMWFhbYsmULtLSK/q8pDw+PPH1WVe2TmvCuz3mVKlXe+1kD8uf9NDExwZEjRxAaGoolS5ao9RjFxbNnzzB9+nSEhYVpOhUCoKPpBIjyKi0tDT///DM2b96s6VQKhe+++w4dO3aUbkdGRqJXr14YPXo0XF1dpXYTE5OPPta5c+cwY8YMeHh4oHTp0gWR7md35MgRrFix4p2/4Pft25en/VKQtm3bhvDwcHh7e2s0j8/J19cXffr0QY8ePd4bs2LFCly/fh1nz56FXC7/fMkVoClTpuD777+Xbl+5cgUjR46Er68v2rZtK7WXK1dOE+kVqPx8P62srHD06FG0bdsW9vb2aNmyZT5lWbQ8e/YMM2bMQJUqVdCwYUNNp1PisZCmIqNjx47Ytm0bxo0bhwYNGmg6nU8mhEBqaioMDAzUun/FihVRsWJF6fbDhw8BAJUrV0aLFi3yI8Viq1GjRppOgd5j5MiRGDlypKbTUEtGRgZkMhl0dJR/tVavXh3Vq1eXbqempgIAatasWew/q/n9ftatWxfPnz/Pt8cj+lRF/zszKjHGjx+PsmXLYsKECR+NTU1NxaRJk1C1alXo6emhQoUKGDlyZK6pC1WqVIGLiwsOHTqERo0awcDAAHXq1MGhQ4cA/DetpE6dOjAyMkLz5s1x6dKldz7fzZs30a5dOxgZGaFcuXIYNWoUXr9+rRQjk8kwatQorF69GnXq1IFcLsfGjRsBAPfu3YOrqyvMzc0hl8tRp04drFixQo29lNuBAwfg4OAAQ0NDGBsbw8nJCefPn5e2T58+HT/++CMAoGrVqtLXzKdOnQIA7NixA87Ozihfvry0fyZOnIjk5GS1c9qwYQNsbGyk17pp06Z3xqWnp2P27NmoXbs25HI5ypUrh0GDBuHFixcffHwPDw9p/7351XnOHxvvmtoRFxeHsWPHolq1apDL5TA3N0fnzp1x+/bt9z5PRkYG3N3dUapUKanPCCGwcuVKNGzYEAYGBjA1NUWfPn3wv//9T7pfmzZtcPjwYTx69EgpvxyrVq1CgwYNUKpUKRgbG6N27dr46aefPviaAWDGjBmwt7dHmTJlYGJigsaNG2Pt2rUQQuSK3bZtGxwcHFCqVCmUKlUKDRs2xNq1a3PFhYaG4ssvv4ShoSGqVauGefPm5ZrnmpCQgHHjxil93ry9vZX6iEwmQ3JyMjZu3PjOaUdRUVEYNmwYKlasCD09PVStWhUzZsxAZmam0nOps29ypuUsWLAAc+bMQeXKlaGvr4+mTZvi+PHjSrH379/HoEGDULNmTRgaGqJChQro2rUrbty4oRR36tQpyGQybN68GWPHjkWFChUgl8tx//79D+byLmfOnIFMJsP27dtzbdu0aRNkMhlCQ0MB/N90srwcc/LSFz/k8OHDaNiwIeRyOapWrarStKaPvZ8ZGRkwNzeHm5tbrvvGxcXBwMAAPj4+UtvbfczKygpeXl5ISkpSum/OcXbz5s2oU6cODA0N0aBBA+nz+aa8HHdz3udt27ZhwoQJKF++PEqVKoWuXbvi+fPnSExMxNChQ2FmZgYzMzMMGjQoV055fR9yplN96DN36tQpNGvWDAAwaNAg6bOU883b//73P3zzzTewsrKCXC6HhYUF2rVrx2kgBUkQFXLr168XAERoaKj49ddfBQBx/Phxabujo6OoV6+edDs7O1t06NBB6OjoiClTpoiAgACxaNEiYWRkJBo1aiRSU1OlWGtra1GxYkVha2srtm/fLo4cOSLs7e2Frq6umDp1qmjVqpXYu3ev2Ldvn6hVq5awsLAQr1+/lu7v7u4u9PT0ROXKlcWcOXNEQECAmD59utDR0REuLi5KrwOAqFChgqhfv77Ytm2bOHHihAgPDxc3b94UCoVC2NnZiU2bNomAgAAxduxYoaWlJaZPn57n/fTgwQMBQCxcuFBq27p1qwAgnJ2dxV9//SV27NghmjRpIvT09MSZM2eEEEI8efJEjB49WgAQe/fuFefPnxfnz58X8fHxQgghZs2aJZYsWSIOHz4sTp06JVavXi2qVq0q2rZtq/T806ZNE3k5pOS8n927dxcHDx4UW7ZsETVq1BCVKlUS1tbWUlxWVpbo2LGjMDIyEjNmzBCBgYHizz//FBUqVBB169ZVeh/edv/+fdGnTx8BQHo958+fl957a2tr4e7uLsUnJCSIevXqCSMjIzFz5kzx999/iz179ogxY8aIEydOvHP/xsbGirZt2wpLS0tx6dIl6bE8PT2Frq6uGDt2rPD39xfbtm0TtWvXFhYWFiIqKkoIIcTNmzdFq1athKWlpVJ+Qgixfft2AUCMHj1aBAQEiGPHjonVq1cLLy+vj+5bDw8PsXbtWhEYGCgCAwPFrFmzhIGBgZgxY4ZS3JQpUwQA0atXL7Fr1y4REBAgFi9eLKZMmSLFODo6irJly4qaNWuK1atXi8DAQDFixAgBQGzcuFGKS05OFg0bNhRmZmZi8eLF4tixY+LXX38VCoVCfPXVVyI7O1sIIcT58+eFgYGB6Ny5s/R6b968KYQQIjIyUnr/16xZI44dOyZmzZol5HK58PDwkJ5L3X2T895VqlRJfPHFF2LPnj1i165dolmzZkJXV1ecO3dOig0KChJjx44Vu3fvFkFBQWLfvn2iR48ewsDAQNy+fVuKO3nypPSZ7tOnjzhw4IA4dOiQiImJ+ej7lHPfXbt2SW2NGjUSrVq1yhXbrFkz0axZM+m2KsecvPTF9zl27JjQ1tYWX3zxhdi7d6+0vypXrvzRz3le388ffvhBGBgYSMeaHCtXrhQAxPXr14UQ7+5jS5YsESYmJsLR0VFkZWVJ9wUgqlSpIpo3by527twpjhw5Itq0aSN0dHTEv//+K8Xl9bib815ZW1sLDw8P4e/vL1avXi1KlSol2rZtK5ycnMS4ceNEQECAmD9/vtDW1hajR49W633Iy2cuPj5eOob+/PPP0mfpyZMnQgghbGxsRI0aNcTmzZtFUFCQ2LNnjxg7dqw4efLkB98zUh8LaSr03iyk09LSRLVq1UTTpk2lX9BvF9L+/v4CgFiwYIHS4+zYsUMAEL///rvUZm1tLQwMDERERITUFhYWJgCI8uXLi+TkZKn9r7/+EgDEgQMHpDZ3d3cBQPz6669KzzVnzhwBQAQHB0ttAIRCoRCvXr1Siu3QoYOoWLFirl8mo0aNEvr6+rni3+ftQi8rK0tYWVkJOzs7pV80iYmJwtzcXLRs2VJqW7hwoQAgHjx48MHnyM7OFhkZGSIoKEgAENeuXZO25aWQzsmpcePG0vsnhBAPHz4Uurq6SoV0TtG0Z88epccIDQ0VAMTKlSs/+FwjR458bz5vF9IzZ84UAERgYOB7H+/N/fvgwQNRt25dUbduXfHw4UMp5vz58wKA+OWXX5T
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre Total de tickets achetés par Type d'évènements\n",
"\n",
"total_tickets_by_event = customer.groupby('name_event_types')['nb_tickets'].sum()\n",
"\n",
"total_tickets_by_event.plot(kind='bar', figsize=(8, 5))\n",
"plt.xlabel(\"Type d'évènements\")\n",
"plt.ylabel('Nombre Total de tickets achetés')\n",
"plt.title(\"Nombre Total de tickets achetés par Type d'évènements\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c95cc35c-abfc-47c7-9b8a-ac69bfd60dd8",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwsAAAJgCAYAAADF4v+XAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACNvklEQVR4nOzdd1hT1/8H8HfYIENQQERkSB0IuFAL1j1x1lE3DrBWrcXdOqq4tdY6W7TWgZPi3nuL4ldBBBfWgWIV3IiioJDz+8MfqTEJgkIu4Pv1PDwPOfcmeecmuckn59xzZUIIASIiIiIiovfoSB2AiIiIiIgKJhYLRERERESkFosFIiIiIiJSi8UCERERERGpxWKBiIiIiIjUYrFARERERERqsVggIiIiIiK1WCwQEREREZFaLBaIiIiIiEgtFgtERIXY3bt3YWNjgxEjRkgdhYgoz/To0QMVKlTAo0ePPvo2/P39Ua5cOSQnJ+ddsM9QroqFkJAQyGQyGBkZ4fbt2yrLGzRoAHd39zwLlxt9+vSBqampJPf9ITKZDBMnTszT20xPT8fvv/+Or776CpaWljAwMIC9vT06d+6MY8eO5el9FVYNGjRAgwYNPvl23rx5A1tbW3z55Zca15HL5Shbtiw8PT0/+f7UCQ4ORkhISL7c9oc4OTlBJpNp3JarVq2CTCaDTCbD0aNHtZpNSi9fvsTEiRPVPuasfeWtW7cUbX369IGTk5PSek5OTujTp89HZ8jIyEDXrl3RqFEjzJ49+6NvR9s+9XF/LO43tSu79wjlnJT7/7xw9OhRlc+HPn36ZPv5vHjxYhw6dAh79+5FyZIlP/q+lyxZgvLly0uyvylopk+fjq1bt37UdT+qZyE9PR0///zzR90hfbpHjx6hTp06GD58ONzd3RESEoJDhw7ht99+g66uLho3boyYmBipYxYZ+vr68PPzw//+9z9cvnxZ7ToHDx7EnTt3EBAQkC8ZpP6wMDMzw/Hjx3Hjxg2VZcuXL4e5ubkEqaT18uVLTJo0Se0XoVatWiEiIgJ2dnbZ3saWLVswfvz4j84wZswY6OnpKQo20oz7Te3L7j1COSf1/l/bzp07h/Hjx2P37t1wdnb+pNvS09PDhg0bcOfOHfz66695lLBw+pRiQe9jrtSiRQusW7cOI0eORJUqVT7qjgsSIQTS0tJgbGwsdZQc6dWrF2JiYrBv3z40atRIaVnXrl0xfPhwWFpaSpSuaAoICMBvv/2G5cuXq/0Fd/ny5TAwMEDPnj0lSJf/vvrqK1y4cAHLly/HtGnTFO03btzA8ePH0a9fP/z1118SJixYrK2tYW1t/cH1qlWr9kn3U1A//DIzM5GRkQFDQ0Opoyhwv/l5evnyJUxMTKSOka3C9h0kv1WvXh0PHz7Ms9szNTVFVFRUnt3e5+ijehZ+/PFHlChRAj/99NMH101LS8OYMWPg7Oys6PL9/vvvVcaPOTk5oXXr1ti5cyeqVasGY2NjVKpUCTt37gTwtlu/UqVKKFasGGrVqoXIyEi193fp0iU0btwYxYoVg7W1NQYPHoyXL18qrSOTyTB48GAsXrwYlSpVgqGhIVauXAkAuHbtGrp37w4bGxsYGhqiUqVK+OOPP3K0XVJSUvDtt9+iRIkSMDU1RYsWLfDPP/+oXfdj7ycqKgp79uxBQECAygdelpo1a6Js2bIAgIcPH2LQoEFwc3ODqakpbGxs0KhRI5w4cULpOrdu3YJMJsPs2bMxZ84cODs7w9TUFN7e3jh9+rTSupGRkejatSucnJxgbGwMJycndOvWTWVo2sSJE9X+2vn+EI3w8HDo6+tj5MiRatdbtmxZtttECIFZs2bB0dERRkZGqF69Ovbs2aN23ZSUFIwcOVLp9Th06FCkpqZmex+VKlWCt7c3Vq9ejYyMDKVlycnJ2LZtG9q1a4cSJUootlHbtm1hZWUFIyMjVKtWDevXr1f7+I4cOYKBAweiZMmSKFGiBDp06IB79+4p1nNycsKlS5dw7NgxxXCfd4ez5PQxbdiwAbVr14aFhQVMTEzg4uICf3//bB93Fh0dHfTq1QsrV66EXC5XtC9fvhwODg5o0qSJ2utt374d3t7eMDExgZmZGZo2bYqIiAjF8hMnTkAmkyE0NFTlulm/lp89e1bRlpfbVRNNw9feHUZ069YtRTEwadIkxfOS1dWtbhiSOu8Px5HL5Zg6dSoqVKgAY2NjFC9eHJ6enpg/f77S9T5lP5W1//vzzz9Rvnx5GBoaws3NDX///bfSerndd8yaNQtTp06Fs7MzDA0NceTIkRzlyZKfr+Pc7jcB4OLFi2jXrh0sLS1hZGSEqlWrKj4nsmQNrwgNDcW4ceNQunRpmJubo0mTJrh69arSutHR0WjdurXiOStdujRatWqFf//9V7GOtj4vs4btxsXFoXnz5ihWrBjs7Owwc+ZMAMDp06fx1VdfoVixYihfvrzK487Ja+ND7xEgZ69jTe8ldUNbsoZCHz9+HD4+PjAxMVG8Ng4fPowGDRqgRIkSMDY2RtmyZdGxY0eV7wfvy9rWW7ZsgaenJ4yMjODi4oIFCxYorZeWloYRI0agatWqsLCwgJWVFby9vbFt2zaV28zuO4i6+1e3/3/x4gWKFy+O7777TuU6t27dgq6uruLHhKxteODAAfTt2xdWVlYoVqwY2rRpg5s3b6pc/+DBg2jcuDHMzc1hYmKCOnXq4NChQ9lupyxxcXFo0aIFTExMULJkSQwYMADPnz/P0XVfv36NqVOnomLFijA0NIS1tTX69u2rVDh8/fXXcHR0VPocylK7dm1Ur15dcVkIgeDgYFStWlWxP+3QoQOuX7+udL2s183Zs2dRt25dxX5l5syZKveT0/1U1nO8YsUKxf7cy8sLp0+fhhACv/76q+J7VqNGjVQyATl7HrK+Z126dAndunWDhYUFbG1t4e/vj2fPninlSU1NxcqVKxWvo1wN0xa5sGLFCgFAnD17VsyfP18AEIcOHVIsr1+/vqhcubLislwuF82bNxd6enpi/PjxYv/+/WL27NmiWLFiolq1aiItLU2xrqOjoyhTpoxwd3cXoaGhYvfu3aJ27dpCX19fTJgwQdSpU0ds3rxZbNmyRZQvX17Y2tqKly9fKq7fu3dvYWBgIMqWLSumTZsm9u/fLyZOnCj09PRE69atlR4HAGFvby88PT3FunXrxOHDh8XFixfFpUuXhIWFhfDw8BCrVq0S+/fvFyNGjBA6Ojpi4sSJ2W4buVwuGjZsKAwNDRX3HxQUJFxcXAQAERQUpFj3U+5n+vTpAoDYs2dPtutliYuLEwMHDhR///23OHr0qNi5c6cICAgQOjo64siRI4r14uPjBQDh5OQkWrRoIbZu3Sq2bt0qPDw8hKWlpUhOTlasu2HDBjFhwgSxZcsWcezYMfH333+L+vXrC2tra/Hw4UPFekFBQULdSyzrdRQfH69omzlzpgAgtm3bJoQQ4uLFi8LExET07Nnzg48x634CAgLEnj17xJIlS4S9vb0oVaqUqF+/vmK91NRUUbVqVVGyZEkxZ84ccfDgQTF//nxhYWEhGjVqJORyebb3s3TpUgFAbN26Van9jz/+EADE3r17hRBCHD58WBgYGIi6deuKsLAwsXfvXtGnTx8BQKxYsUJlO7i4uIgffvhB7Nu3TyxdulRYWlqKhg0bKtY7d+6ccHFxEdWqVRMREREiIiJCnDt3LleP6dSpU0Imk4muXbuK3bt3i8OHD4sVK1YIPz+/D25fR0dH0apVK3H9+nUhk8nE7t27hRBCZGRkCHt7ezFhwgSxYcMGAUDpNbV27VoBQDRr1kxs3bpVhIWFiRo1aggDAwNx4sQJxXrVqlUTderUUbnfmjVripo1ayou5/V21aR+/fpKr5ssvXv3Fo6OjkIIIdLS0sTevXsVr7us5+X69etKGd59jb97/Xe3be/evRWXZ8yYIXR1dUVQUJA4dOiQ2Lt3r5g3b57SfuFT9h9CvN3/OTg4CDc3NxEaGiq2b98uWrRoIQCIDRs2KNbL7b7D3t5eNGzYUGzcuFHs379f6bG/7/3Hnd+v44/
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre de Canaux de Ventes Moyen utilisé par les Consommateurs par type d'évènement\n",
"\n",
"avg_supp_event = customer.groupby('name_event_types')['nb_suppliers'].mean()\n",
"avg_supp_event.plot(kind='bar')\n",
"plt.xlabel(\"Type d'évènement\")\n",
"plt.ylabel('Nombre de Canaux de Ventes Moyen')\n",
"plt.title(\"Nombre de Canaux de Ventes Moyen utilisé par les Consommateurs par type d'évènement\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "49d5fd2d-9bc1-43ac-9270-1efd73759854",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAJICAYAAABMlwOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyfElEQVR4nOzdd1hT1/8H8HcYYYnIEBAH4kJR3IpiKzhwUupuxSIutGpFqtbRqjhR0aqtVsVtXVj3LAWtoqi4927rFkSRIcoM5/eHP+7XCCqJQQK+X8/j0+bck+Sd5HLz4XDuuTIhhAAREREREalEp7ADEBEREREVRSykiYiIiIjUwEKaiIiIiEgNLKSJiIiIiNTAQpqIiIiISA0spImIiIiI1MBCmoiIiIhIDSykiYiIiIjUwEKaiIiIiEgNLKSJiIqBlJQUVK9eHd27d0d2dnZhxyEqth4+fAhra2uMHDlS7cd48OABypcv/0GPQdqBhXQxtXr1ashkMhgaGuLu3bu5tru7u6NWrVqFkAzo06cPSpQoUSjPPWnSJMhksvf+c3d3f+9jHTt2DJMmTUJiYuIH51FXnz59ULFiRbXv/6Z9+/Zh0qRJeW6rWLEi+vTpo9Lj3blzBzKZDHPmzPnwcAA2bNiA+fPna+SxCkJB/FwFBQVhx44d7+3n5+cHGxsbrFu3Djo6Rf/Q/iHHiatXr2LSpEm4c+eOZkMVkJcvX2LSpEk4dOhQYUfJF3d393wdR992LNEmeR1DZTIZVq9enWf/rKwsfP3112jZsuUHHdfKlSuHP//8EytXrsS2bdvUfpzioKj9vL5Jr7ADUMFKT0/H+PHjsXbt2sKOohUGDBiAdu3aSbdjYmLQpUsXDBs2DN7e3lJ7yZIl3/tYx44dw+TJk9GnTx+UKlWqIOJ+dPv27cNvv/2W5xfg9u3b8/W+FKQNGzbg8uXLCAgIKNQcH1NQUBC6deuGTp06vbXPb7/9hosXL+Lo0aMwMDD4eOG01NWrVzF58mS4u7tr9BfNgvLy5UtMnjwZAPL1S3xhW7RoEZKTk6Xbe/fuxbRp07Bq1SpUr15dai9XrlxhxCtQ48aNg56eHn7//fcPGgQBgFq1amH79u3o0aMHateujSpVqmgoZdFS1H5e38RCuphr164dNmzYgFGjRqFOnTqFHeeDCSGQlpYGIyMjte5frlw5pYN7zm/AFSpUQJMmTTQRsdiqV69eYUegtxg6dCiGDh1a2DFUlpqaqvbPcmF4+fIljI2NCzvGR5OamgpDQ8NcBaOTk5PS7evXrwN4VRg2bNjwo+UrDLNnz9bo47m7uyMuLk6jj0kfV9H/+x+90+jRo2FpaYkxY8a8t29aWhrGjRsHBwcHyOVylC1bFkOHDs01daFixYrw9PTEnj17UK9ePRgZGaFGjRrYs2cPgFfTSmrUqAETExM0btwYp0+fzvP5rly5glatWsHExASlS5fGd999h5cvXyr1kclk+O6777BkyRLUqFEDBgYGWLNmDQDg1q1b8Pb2hrW1NQwMDFCjRg389ttvarxLue3atQtNmzaFsbExTE1N4eHhgePHj0vbJ02ahB9++AEA4ODgIP0pM+dPs5s2bUKbNm1QpkwZ6f0ZO3YsXrx4oXam1atXw9HRUXqtv//+e579MjIyMG3aNFSvXh0GBgYoXbo0+vbtiydPnrzz8fv06SO9f6//eTbnl428pnYkJiZi5MiRqFSpEgwMDGBtbY0OHTpIX6x5yczMhK+vL0qUKCHtM0IILFq0CHXr1oWRkRHMzc3RrVs3/Pfff9L93N3dsXfvXty9e1cpX47FixejTp06KFGiBExNTVG9enX8+OOP73zNADB58mS4uLjAwsICJUuWRP369bFixQoIIXL13bBhA5o2bYoSJUqgRIkSqFu3LlasWJGr36lTp/D555/D2NgYlSpVwsyZM3PNW05OTsaoUaOUft4CAgKU9hGZTIYXL15gzZo1eU47io2NxaBBg1CuXDnI5XI4ODhg8uTJyMrKUnoudd+b993vbVOTcqaWvf6n2pzjxrZt21CvXj0YGhpKo7D5lfMYYWFhqF+/PoyMjFC9enWsXLlS6bm7d+8OAGjRooX0vr3+p/r9+/ejVatWKFmyJIyNjdGsWTMcOHBA6blyXtvZs2fRrVs3mJubo3LlyvnOkeN9n9GdO3dQunRpAK/2xZy875pGdejQIchkMqxbtw4jRoyAra0tjIyM4ObmhnPnzin1PX36NL7++mtUrFgRRkZGqFixInr27Jlryl/OZxYeHo5+/fqhdOnSMDY2Rnp6+ns+ldzWrl0LmUymdMzMMWXKFOjr6+PRo0cA/jcd6siRI2jSpAmMjIxQtmxZTJgwAQqFQum+6h7bXn+N+TmG5uV93zdPnjyBXC7HhAkTct33+vXrkMlk+PXXX6W2N/eLihUrIjAwUOln9/WpcXPnzoWDgwNKlCiBpk2bIjo6OtfznD59Gl5eXrCwsIChoSHq1auHP/74I9d7IJPJ8Pfff8PPzw+WlpYoWbIkevfujRcvXiA2NhY9evRAqVKlUKZMGYwaNQqZmZlKj5Hfz0ETP6/nzp2Dp6en9L7b2dmhY8eOePDgwfs+so9HULG0atUqAUCcOnVK/PLLLwKAOHDggLTdzc1N1KxZU7qdnZ0t2rZtK/T09MSECRNEeHi4mDNnjjAxMRH16tUTaWlpUl97e3tRrlw5UatWLbFx40axb98+4eLiIvT19cXEiRNFs2bNxLZt28T27dtFtWrVhI2NjXj58qV0f19fXyGXy0WFChXE9OnTRXh4uJg0aZLQ09MTnp6eSq8DgChbtqyoXbu22LBhg/j777/F5cuXxZUrV4SZmZlwdnYWv//+uwgPDxcjR44UOjo6YtKkSfl+n27fvi0AiNmzZ0tt69evFwBEmzZtxI4dO8SmTZtEgwYNhFwuF0eOHBFCCHH//n0xbNgwAUBs27ZNHD9+XBw/flwkJSUJIYSYOnWqmDdvnti7d684dOiQWLJkiXBwcBAtWrRQev7AwECRnx/DnM/zyy+/FLt37xbr1q0TVapUEeXLlxf29vZSP4VCIdq1aydMTEzE5MmTRUREhFi+fLkoW7ascHJyUvoc3vTPP/+Ibt26CQDS6zl+/Lj02dvb2wtfX1+pf3JysqhZs6YwMTERU6ZMEX/99ZfYunWrGD58uPj777/zfH8TEhJEixYthK2trTh9+rT0WH5+fkJfX1+MHDlShIWFiQ0bNojq1asLGxsbERsbK4QQ4sqVK6JZs2bC1tZWKZ8QQmzcuFEAEMOGDRPh4eFi//79YsmSJcLf3/+9722fPn3EihUrREREhIiIiBBTp04VRkZGYvLkyUr9JkyYIACILl26iM2bN4vw8HAxd+5cMWHCBKmPm5ubsLS0FFWrVhVLliwRERERYsiQIQKAWLNmjdTvxYsXom7dusLKykrMnTtX7N+/X/zyyy/CzMxMtGzZUmRnZwshhDh+/LgwMjISHTp0kF7vlStXhBBCxMTESJ9/SEiI2L9/v5g6daowMDAQffr0kZ5L3fcmP/d72/6bs7/evn1barO3txdlypQRlSpVEitXrhQHDx4UJ0+efOvz+/r6ChMTE6W2nGOPk5OT+P3338Vff/0lunfvLgCIyMhIIYQQcXFxIigoSAAQv/32m/S+xcXFCSGEWLt2rZDJZKJTp05i27ZtYvfu3cLT01Po6uqK/fv353pt9vb2YsyYMSIiIkLs2LEj3zny+xmlpaWJsLAwAUD0799fyvvPP/+89b05ePCgACDKly+f65hQsmRJ8e+//0p9N2/eLCZOnCi2b98uIiMjRWhoqHBzcxOlS5cWT548yfWZlS1bVgwcOFD8+eefYsuWLSIrK+utOd6876lTp4QQQqSnpwtbW1vRq1cvpX6ZmZnCzs5OdO/eXWrL+Zmxs7MTv/76q/jrr7+Ev7+/ACCGDh0q9fuQY9vrGd93DM1Lfr9vOnfuLMqXLy8UCoXS/UePHi3kcrl4+vSpECLv/WLy5MlCLpcLHx8f6X45x8+KFSuKdu3aiR07dogdO3YIZ2dnYW5uLhITE6W
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre Total de tickets achetés sur Internet par Type d'évènements\n",
"\n",
"nb_tickets_internet = customer.groupby('name_event_types')['nb_tickets_internet'].sum()\n",
"nb_tickets_internet.plot(kind='bar', figsize=(8, 5))\n",
"plt.xlabel(\"Type d'évènements\")\n",
"plt.ylabel('Nombre Total de tickets achetés sur Internet')\n",
"plt.title(\"Nombre Total de tickets achetés sur Internet par Type d'évènements\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc071992-cf4d-4b9f-9c3b-3f0e98e20eff",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "4f9561a9-6a94-434e-b8e7-9b708f5b5529",
"metadata": {},
"source": [
"# 3 - Caractéristiques Démographiques (peu exploitable)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "e50e2583-4b8f-478e-87ac-591dde200af8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'birthdate', 'street_id', 'is_partner', 'gender',\n",
" 'is_email_true', 'opt_in', 'structure_id', 'profession', 'language',\n",
" 'mcp_contact_id', 'last_buying_date', 'max_price', 'ticket_sum',\n",
" 'average_price', 'fidelity', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'age', 'tenant_id',\n",
" 'event_type_id', 'nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',\n",
" 'name_event_types', 'avg_amount', 'nb_campaigns', 'nb_campaigns_opened',\n",
" 'time_to_open'],\n",
" dtype='object')"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer.columns"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "c724a315-9fe8-4874-be8f-a8115b17b5e2",
"metadata": {},
"outputs": [],
"source": [
"def percent_of_na(df, column):\n",
" na_percentage = df[column].isna().mean() * 100\n",
" non_na_percentage = 100 - na_percentage\n",
" \n",
" labels = ['Valeurs Manquantes', 'Non-Valeurs Manquantes']\n",
" sizes = [na_percentage, non_na_percentage]\n",
" colors = ['#ff9999','#66b3ff']\n",
" explode = (0.1, 0)\n",
"\n",
" plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)\n",
" plt.axis('equal') \n",
" plt.title('Pourcentage de Valeurs Manquantes : {}'.format(column))\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "58af5dcb-673e-4f4d-ad5c-f66ce1e8a22c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAGZCAYAAACt2rSnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeR0lEQVR4nO3dd3wUdf7H8demVyCBFAgJPfSQIL2HckGKCqKcIl1PBdspotypoGLDH5az4akU9Q48FVBQEKQJUpVu6FU6AUIJhLT5/TEmZkkC6ZPdvJ+PRx6ws7Mzn91sNu985zufsRmGYSAiIiIiheJidQEiIiIijkxhSkRERKQIFKZEREREikBhSkRERKQIFKZEREREikBhSkRERKQIFKZEREREikBhSkRERKQIFKZEREREikBhyglNnz4dm82W9eXm5kb16tUZPnw4R48etbq8UvXyyy8zd+5cq8sotIMHD2Kz2Zg+fXqRt/XNN99gs9mYMmVKnussXrwYm83GG2+8ke/tDhs2jJo1axa5vtK2fPnyrJ+RvF7frl27YrPZHPL5lYTVq1czYcIEEhMTrS6lTFmyZAktWrTA19cXm81myWeOo/4cOguFKSc2bdo01qxZw+LFi7nvvvuYOXMmHTt2JCkpyerSSo2jh6ni1Lt3b0JDQ5k6dWqe60ybNg13d3cGDx5cipVZy9/fn08++STH8gMHDrB8+XIqVKhgQVVl0+rVq3n++ecVprIxDIM777wTd3d3vv32W9asWUPnzp1LvY5nn32WOXPmlPp+xaQw5cSaNGlCmzZtiI2NZfz48YwdO5YDBw6Uari4cuUKuvxj2eDm5saQIUPYsGED27dvz3F/YmIic+bM4ZZbbiEoKMiCCgsmNTWVtLS0Im9n4MCBrFq1ij179tgtnzp1KmFhYbRv377I+xDHYRgGV65cyff6x44d4+zZs/Tr149u3brRpk0bAgICSrDC3NWpU4eYmJhS36+YFKbKkTZt2gBw6NAhAJKTkxk3bhy1atXCw8ODsLAwRo8eneOvTpvNxoQJE3Jsr2bNmgwbNizrdubhxUWLFjFixAiCgoLw8fHh6tWrAPz3v/+lbdu2+Pn54efnR3R0dI4RgR9//JFu3bpRoUIFfHx8aN++PUuWLLFbZ8KECdhsNn777TfuuusuKlasSEhICCNGjOD8+fN2dSclJTFjxoyswzldunQB4PTp04waNYpGjRrh5+dHcHAwXbt2ZeXKlTme55EjRxgwYAD+/v5UqlSJQYMGsWHDhlwPD/3yyy/ccsstBAYG4uXlRUxMDP/73//y/J5kd+zYMe688078/f2pWLEiAwcO5MSJE7muW9j9jBw5EjBHoK41c+ZMkpOTGTFiBADvvfcenTp1Ijg4GF9fX5o2bcqkSZNITU294X4Mw+D9998nOjoab29vAgICGDBgAPv377db79r3UKYuXbpkfa/gz0Nyn332GU888QRhYWF4enqyd+9eLl++zJgxY6hVqxZeXl4EBgbSokULZs6cecM6AXr06EF4eLjdiF1GRgYzZsxg6NChuLjk/JjM72vTpUsXmjRpwoYNG+jYsSM+Pj7Url2bV199lYyMDLt1d+7cSc+ePfHx8aFKlSo88MADzJs3D5vNxvLlywv8miUnJ/PEE08QHR1NxYoVCQwMpG3btnzzzTc5Hmuz2XjooYf47LPPaNiwIT4+PjRr1oz58+dnrTNhwgSefPJJAGrVqpX1M5W9ti+++IK2bdvi6+uLn58fcXFxbNq0yW5f+/fv569//SvVqlXD09OTkJAQunXrxubNm3PUVVjDhg3Dz8+P3377jW7duuHr60tQUBAPPfQQly9fzvW5T5kyhYYNG+Lp6cmMGTMAWLVqFd26dcPf3x8fHx/atWvHd999Z/eaVK9eHYCnnnoqxyHhPXv2cPfddxMcHIynpycNGzbkvffes9t/RkYGEydOpH79+nh7e1OpUiWioqJ4++23s9Y5ffo0f/vb3wgPD8fT05OgoCDat2/Pjz/+aPecrz3Ml9/P+Jo1a9KnTx8WLlxI8+bN8fb2pkGDBtcdxRZ7blYXIKVn7969AAQFBWEYBrfddhtLlixh3LhxdOzYka1btzJ+/HjWrFnDmjVr8PT0LNR+RowYQe/evfnss89ISkrC3d2d5557jhdffJH+/fvzxBNPULFiRbZv354V7AA+//xzhgwZwq233sqMGTNwd3fnww8/JC4ujh9++IFu3brZ7ef2229n4MCBjBw5km3btjFu3DiArA+ANWvW0LVrV2JjY3n22WcBsg7ZnD17FoDx48cTGhrKpUuXmDNnDl26dGHJkiVZv5SSkpKIjY3l7NmzvPbaa9StW5eFCxcycODAHM972bJl9OzZk9atWzNlyhQqVqzIrFmzGDhwIJcvX871F2CmK1eu0L17d44dO8Yrr7xCZGQk3333XbHvJzIykg4dOvD555/z6quv4u7unnXftGnTCAsLIy4uDoB9+/Zx9913Z30Qb9myhZdeeomdO3fe8EP2/vvvZ/r06TzyyCO89tprnD17lhdeeIF27dqxZcsWQkJCrvv4vIwbN462bdsyZcoUXFxcCA4O5vHHH+ezzz5j4sSJxMTEkJSUxPbt2zlz5ky+tuni4sKwYcP45JNPmDhxIq6urixatIgjR44wfPhwHn300RyPKchrc+LECQYNGsQTTzzB+PHjmTNnDuPGjaNatWoMGTIEgJMnT9K5c2fc3d15//33CQkJ4T//+Q8PPfRQoV4ngKtXr3L27FnGjBlDWFgYKSkp/Pjjj/Tv359p06Zl7TvTd999x4YNG3jhhRfw8/Nj0qRJ9OvXj127dlG7dm3uvfdezp49yzvvvMPs2bOpWrUqAI0aNQLMQ+rPPPMMw4cP55lnniElJYXXX3+djh07sn79+qz1evXqRXp6OpMmTSIiIoKEhARWr159w0OHBw8epFatWgwdOjRfcwhTU1Pp1asX999/P08//TSrV69m4sSJHDp0iHnz5tmtO3fuXFauXMlzzz1HaGgowcHBrFixgh49ehAVFcUnn3yCp6cn77//Pn379mXmzJkMHDiQe++9l2bNmtG/f38efvhh7r777qzPzfj4eNq1a0dERASTJ08mNDSUH374gUceeYSEhATGjx8PwKRJk5gwYQLPPPMMnTp1IjU1lZ07d9q9HoMHD2bjxo289NJLREZGkpiYyMaNG6/7Hi/oZ/yWLVt44oknePrppwkJCeHjjz9m5MiR1K1bl06dOt3w9S73DHE606ZNMwBj7dq1RmpqqnHx4kVj/vz5RlBQkOHv72+cOHHCWLhwoQEYkyZNsnvsF198YQDGv//976xlgDF+/Pgc+6lRo4YxdOjQHPsdMmSI3Xr79+83XF1djUGDBuVZc1JSkhEYGGj07dvXbnl6errRrFkzo1WrVlnLxo8fn2vto0aNMry8vIyMjIysZb6+vnY15iUtLc1ITU01unXrZvTr1y9r+XvvvWcAxoIFC+zWv//++w3AmDZtWtayBg0aGDExMUZqaqrdun369DGqVq1qpKen57n/Dz74wACMb775xm75fffdV6z7MYw/v0+zZ8/OWrZ9+3YDMP75z3/m+pj09HQjNTXV+PTTTw1XV1fj7NmzWfcNHTrUqFGjRtbtNWvWGIAxefJku238/vvvhre3tzF27NisZde+hzJ17tzZ6Ny5c9btZcuWGYDRqVOnHOs2adLEuO222677nHOTuc0vv/zS2L9/v2Gz2Yz58+cbhmEYd9xxh9GlSxfDMAyjd+/eds/vWtd7bTp37mwAxrp16+we06hRIyMuLi7r9lNPPWXYbDZj8+bNduv16NHDAIxly5ZlLcvva3atzPf4yJEjjZiYGLv7ACMkJMS4cOFC1rITJ04YLi4uxiuvvJK17PXXXzcA48CBA3aPP3z4sOHm5mY8/PDDdssvXrxohIaGGnfeeadhGIaRkJBgAMZbb72VZ515OXjwoOHq6mqMGDHihusOHTrUAIy3337bbvlLL71kAMaqVauylgFGxYoV7b5vhmEYbdq0MYKDg42LFy9mLUtLSzOaNGliVK9
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"percent_of_na(customer, 'profession')"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "cc3437f7-8b36-4398-9da6-ff15e8e4c8d7",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAm8AAAGZCAYAAADfFdYRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbwUlEQVR4nO3dd1gUV9sG8HtZll6kN+kKdgG7KPZgjzXGGLEl0WhiisbyJpaoiRGj6VHfvIotscX2xRa7SZSIJVawImIBREBFOux8f4xsXJqLArPl/l3XXsnOzs48s+Dh3jlzzsgEQRBARERERDrBSOoCiIiIiEhzDG9EREREOoThjYiIiEiHMLwRERER6RCGNyIiIiIdwvBGREREpEMY3oiIiIh0CMMbERERkQ5heCMiIiLSIQxvVKGVK1dCJpOpHsbGxqhduzZGjRqFO3fuSF1ejfr888+xbds2qct4bgkJCZDJZFi5cuULb2v79u2QyWRYunRpuevs27cPMpkMixcv1ni7I0eOhI+PzwvXV9MOHz6s+jdS3ufbuXNnyGQynTy+6nDs2DHMnj0bDx48kLqUCs2ePRsymUzqMojUMLyRRqKiohAdHY19+/bhzTffxLp169C+fXtkZWVJXVqN0fXwVpV69eoFV1dXrFixotx1oqKioFAoMHz48BqsTFrW1tZYvnx5qeU3btzA4cOHYWNjI0FV2unYsWP49NNPtT68EWkjhjfSSKNGjdC6dWt06tQJs2bNwpQpU3Djxo0aDTM5OTngrXi1g7GxMSIiInDixAlcuHCh1OsPHjzA1q1b0bdvXzg5OUlQYeUUFBSgsLDwhbczZMgQ/PXXX7h69ara8hUrVsDDwwOhoaEvvA8iIoY3ei6tW7cGANy8eRMAkJubi+nTp8PX1xcmJibw8PDAhAkTSn2rlslkmD17dqnt+fj4YOTIkarnxd21e/fuxejRo+Hk5AQLCwvk5eUBAH755Re0adMGVlZWsLKyQlBQUKkzHvv370eXLl1gY2MDCwsLhIaG4sCBA2rrFHeJXLx4EUOHDoWtrS1cXFwwevRoPHz4UK3urKwsrFq1StU91rFjRwBAamoqxo8fjwYNGsDKygrOzs7o3Lkz/vzzz1LHefv2bQwaNAjW1taoVasWhg0bhhMnTpTZ3Xby5En07dsX9vb2MDMzQ3BwMDZu3Fjuz+Rpd+/exSuvvAJra2vY2tpiyJAhSE5OLnPd593PmDFjAIhn2Epat24dcnNzMXr0aADADz/8gLCwMDg7O8PS0hKNGzdGZGQkCgoKnrkfQRDw448/IigoCObm5rCzs8OgQYMQHx+vtl7J36FiHTt2VP2sgH+7ONesWYNJkybBw8MDpqamuHbtGrKzszF58mT4+vrCzMwM9vb2aN68OdatW/fMOgGgW7du8PT0VDsjqVQqsWrVKowYMQJGRqWbXE0/m44dO6JRo0Y4ceIE2rdvDwsLC/j5+eGLL76AUqlUW/fSpUvo3r07LCws4OjoiHHjxuG3336DTCbD4cOHK/2Z5ebmYtKkSQgKCoKtrS3s7e3Rpk0bbN++vdR7ZTIZ3nnnHaxZswb169eHhYUFmjZtih07dqjWmT17Nj766CMAgK+vr+rf1NO1bdiwAW3atIGlpSWsrKwQHh6Of/75R21f8fHxePXVV+Hu7g5TU1O4uLigS5cuOHPmTKm6qtKGDRvw0ksvwc3NDebm5qhfvz6mTZtWqidi5MiRsLKywrVr19CzZ09YWVnB09MTkyZNUrVlxTRtG0r+bJ7eV8ku+U8//RStWrWCvb09bGxsEBISguXLl5f6EpyXl4dJkybB1dUVFhYWCAsLw6lTp8r8/UhOTsbYsWNRu3ZtmJiYwNfXF59++mmVfPkhzRlLXQDppmvXrgEAnJycIAgC+vXrhwMHDmD69Olo3749zp07h1mzZiE6OhrR0dEwNTV9rv2MHj0avXr1wpo1a5CVlQWFQoGZM2di7ty5GDBgACZNmgRbW1tcuHBBFSQBYO3atYiIiMDLL7+MVatWQaFQYNmyZQgPD8fvv/+OLl26qO1n4MCBGDJkCMaMGYPz589j+vTpAKD6IxwdHY3OnTujU6dOmDFjBgCousDS09MBALNmzYKrqyseP36MrVu3omPHjjhw4ICqoc3KykKnTp2Qnp6OBQsWoE6dOtizZw+GDBlS6rgPHTqE7t27o1WrVli6dClsbW2xfv16DBkyBNnZ2WX+wS2Wk5ODrl274u7du5g/fz4CAgKwc+fOKt9PQEAA2rVrh7Vr1+KLL76AQqFQvRYVFQUPDw+Eh4cDAK5fv47XXntNFe7Pnj2Lzz77DJcuXaqw6xUAxo4di5UrV2LixIlYsGAB0tPTMWfOHLRt2xZnz56Fi4tLhe8vz/Tp09GmTRssXboURkZGcHZ2xocffog1a9Zg3rx5CA4ORlZWFi5cuIC0tDSNtmlkZISRI0di+fLlmDdvHuRyOfbu3Yvbt29j1KhReO+990q9pzKfTXJyMoYNG4ZJkyZh1qxZ2Lp1K6ZPnw53d3dEREQAAFJSUtChQwcoFAr8+OOPcHFxwc8//4x33nnnuT4nQPzjnp6ejsmTJ8PDwwP5+fnYv38/BgwYgKioKNW+i+3cuRMnTpzAnDlzYGVlhcjISPTv3x+XL1+Gn58f3njjDaSnp+O7777Dli1b4ObmBgBo0KABAPEShU8++QSjRo3CJ598gvz8fCxcuBDt27dHTEyMar2ePXuiqKgIkZGR8PLywv3793Hs2LFndsUmJCTA19cXI0aMeK5rQK9evYqePXvi/fffh6WlJS5duoQFCxYgJiYGBw8eVFu3oKAAffv2xZgxYzBp0iT88ccfmDt3LmxtbTFz5kwAlWsbKiMhIQFjx46Fl5cXAODvv//Gu+++izt37qj2DQCjRo3Chg0bMGXKFHTu3BmxsbHo378/Hj16pLa95ORktGzZEkZGRpg5cyb8/f0RHR2NefPmISEhocwvclRNBKIKREVFCQCEv//+WygoKBAyMzOFHTt2CE5OToK1tbWQnJws7NmzRwAgREZGqr13w4YNAgDhv//9r2oZAGHWrFml9uPt7S2MGDGi1H4jIiLU1ouPjxfkcrkwbNiwcmvOysoS7O3thT59+qgtLyoqEpo2bSq0bNlStWzWrFll1j5+/HjBzMxMUCqVqmWWlpZqNZansLBQKCgoELp06SL0799ftfyHH34QAAi7d+9WW3/s2LECACEqKkq1rF69ekJwcLBQUFCgtm7v3r0FNzc3oaioqNz9L1myRAAgbN++XW35m2++WaX7EYR/f05btmxRLbtw4YIAQPj444/LfE9RUZFQUFAgrF69WpDL5UJ6errqtREjRgje3t6q59HR0QIAYdGiRWrbuHXrlmBubi5MmTJFtazk71CxDh06CB06dFA9P3TokABACAsLK7Vuo0aNhH79+lV4zGUp3uamTZuE+Ph4QSaTCTt27BAEQRAGDx4sdOzYURAEQejVq5fa8ZVU0WfToUMHAYBw/Phxtfc0aNBACA8PVz2fOnWqIJPJhDNnzqit161bNwGAcOjQIdUyTT+zkop/x8eMGSMEBwervQZAcHFxER49eqRalpycLBgZGQnz589XLVu4cKEAQLhx44ba+xMTEwVjY2Ph3XffVVuemZkpuLq6Cq+88oogCIJw//59AYDw9ddfl1tneRISEgS5XC6MHj36mesWtxHlUSqVQkFBgXDkyBEBgHD27FnVayNGjBAACBs3blR7T8+ePYXAwEDV88q0DeX9bEr+2ymp+Hdrzpw5goODg6ptu3jxogBAmDp1qtr669atEwCo/X6MHTtWsLKyEm7evKm27pdffikAEC5evFju/qlqsduUNNK6dWsoFApYW1ujd+/ecHV1xe7du+Hi4qL6plnyLM3gwYNhaWlZqquyMgYOHKj2fN++fSgqKsKECRPKfc+xY8eQnp6OESNGoLCwUPVQKpXo3r07Tpw4Uap7o2/fvmrPmzRpgtzcXNy7d0+jOpcuXYqQkBCYmZnB2NgYCoUCBw4cQFxcnGqdI0eOwNraGt27d1d779ChQ9W
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"percent_of_na(customer, 'language')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c34164d2-5ab2-4923-a165-30dc5c070233",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}