BDC-team-1/useless/Temporary_barplot_example_TP.ipynb

959 lines
126 KiB
Plaintext
Raw Normal View History

2024-03-02 09:58:05 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "08977396-ae9a-4c48-9890-e2d3f9bf5c0e",
"metadata": {},
"source": [
"# TP : graphique barplot - nombre d'achats par mois"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "225af1ed-6dcd-4116-99d1-f649dfa8f96f",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import matplotlib.dates as mdates\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7fe35156-ea0b-4f9b-b981-1231e26b1baf",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e0a09bf5-5a96-40c2-93be-ba0a6a130266",
"metadata": {},
"outputs": [],
"source": [
"## Evolution vente \n",
"\n",
"# Importation\n",
"# Chargement des données temporaires\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Temp/Company 1 - Purchases.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" purchases = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "0c686793-b760-4013-9f79-f2eeee86cafb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>type_of_ticket_name</th>\n",
" <th>amount</th>\n",
" <th>children</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13070859</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>8.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13070860</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13070861</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13070862</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13070863</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826667</th>\n",
" <td>20662815</td>\n",
" <td>1256135</td>\n",
" <td>8007697</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 17:23:54+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826668</th>\n",
" <td>20662816</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826669</th>\n",
" <td>20662817</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826670</th>\n",
" <td>20662818</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826671</th>\n",
" <td>20662819</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1826672 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 13070859 48187 5107462 4 vente en ligne \n",
"1 13070860 48187 5107462 4 vente en ligne \n",
"2 13070861 48187 5107462 4 vente en ligne \n",
"3 13070862 48187 5107462 4 vente en ligne \n",
"4 13070863 48187 5107462 4 vente en ligne \n",
"... ... ... ... ... ... \n",
"1826667 20662815 1256135 8007697 5 vente en ligne \n",
"1826668 20662816 1256136 8007698 5 vente en ligne \n",
"1826669 20662817 1256136 8007698 5 vente en ligne \n",
"1826670 20662818 1256137 8007699 5 vente en ligne \n",
"1826671 20662819 1256137 8007699 5 vente en ligne \n",
"\n",
" purchase_date type_of_ticket_name amount \\\n",
"0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
"1 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"2 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"3 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"4 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"... ... ... ... \n",
"1826667 2023-11-08 17:23:54+00:00 Atelier 11.0 \n",
"1826668 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826669 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826670 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"1826671 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"\n",
" children is_full_price name_event_types name_facilities \\\n",
"0 pricing_formula False spectacle vivant mucem \n",
"1 pricing_formula False spectacle vivant mucem \n",
"2 pricing_formula False spectacle vivant mucem \n",
"3 pricing_formula False spectacle vivant mucem \n",
"4 pricing_formula False spectacle vivant mucem \n",
"... ... ... ... ... \n",
"1826667 pricing_formula False offre muséale groupe mucem \n",
"1826668 pricing_formula False offre muséale groupe mucem \n",
"1826669 pricing_formula False offre muséale groupe mucem \n",
"1826670 pricing_formula False offre muséale groupe mucem \n",
"1826671 pricing_formula False offre muséale groupe mucem \n",
"\n",
" name_categories name_events name_seasons \n",
"0 indiv prog enfant l'école des magiciens 2018 \n",
"1 indiv prog enfant l'école des magiciens 2018 \n",
"2 indiv prog enfant l'école des magiciens 2018 \n",
"3 indiv prog enfant l'école des magiciens 2018 \n",
"4 indiv prog enfant l'école des magiciens 2018 \n",
"... ... ... ... \n",
"1826667 indiv entrées tp NaN 2023 \n",
"1826668 indiv entrées tp NaN 2023 \n",
"1826669 indiv entrées tp NaN 2023 \n",
"1826670 indiv entrées tp NaN 2023 \n",
"1826671 indiv entrées tp NaN 2023 \n",
"\n",
"[1826672 rows x 15 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "84a11cdd-aeb9-457a-bf7b-b2ad1752b99d",
"metadata": {},
"outputs": [],
"source": [
"purchases['purchase_date'] = pd.to_datetime(purchases['purchase_date'])\n",
"\n",
"purchases_filtered = purchases[purchases['event_type_id'] == 5]"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "bea0e516-ee62-4bb4-bdd9-bb2502972d84",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>fake_category</th>\n",
" <th>purchase_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2013-06-01</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2013-07-01</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2013-09-01</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2013-10-01</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2013-11-01</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>2023-09-01</td>\n",
" <td>1</td>\n",
" <td>6900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>2023-10-01</td>\n",
" <td>0</td>\n",
" <td>3621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>2023-10-01</td>\n",
" <td>1</td>\n",
" <td>8313</td>\n",
" </tr>\n",
" <tr>\n",
" <th>199</th>\n",
" <td>2023-11-01</td>\n",
" <td>0</td>\n",
" <td>945</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200</th>\n",
" <td>2023-11-01</td>\n",
" <td>1</td>\n",
" <td>2268</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>201 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" month fake_category purchase_id\n",
"0 2013-06-01 1 1\n",
"1 2013-07-01 1 1\n",
"2 2013-09-01 0 2\n",
"3 2013-10-01 1 1\n",
"4 2013-11-01 0 2\n",
".. ... ... ...\n",
"196 2023-09-01 1 6900\n",
"197 2023-10-01 0 3621\n",
"198 2023-10-01 1 8313\n",
"199 2023-11-01 0 945\n",
"200 2023-11-01 1 2268\n",
"\n",
"[201 rows x 3 columns]"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# création de la table pr faire le graphique\n",
"\n",
"purchases_graph = purchases_filtered[['purchase_id', 'purchase_date']].drop_duplicates()\n",
"\n",
"purchases_graph[\"fake_category\"] = np.random.choice([0, 1], size=purchases_graph.shape[0], p = [0.3, 0.7])\n",
"\n",
"purchases_graph['month'] = purchases['purchase_date'].dt.strftime('%Y-%m')\n",
"\n",
"# purchases_graph = purchases_graph.groupby('month')['purchase_id'].count().reset_index()\n",
"purchases_graph = purchases_graph.groupby(['month','fake_category'])['purchase_id'].count().reset_index()\n",
"\n",
"purchases_graph['month'] = pd.to_datetime(purchases_graph['month'])\n",
"\n",
"purchases_graph"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "c9b70757-7b80-4e6d-99f0-58b9812f404f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4UAAAJWCAYAAADvDSKVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACjY0lEQVR4nOzdeXxU9b3/8ffsSSbJZIEkBhEBERdwQ0WwVq0LLohLLVUsLrVqa6ulxWqtXai31Z9L1V6tS9WqdaO31+VabRGtW6kogqKiFDeQNawhe2Y9vz/COcxyJskkk8mEvJ6PxzwgZ75z5nvO+Z7lM9/NYRiGIQAAAADAoOTs7wwAAAAAAPoPQSEAAAAADGIEhQAAAAAwiBEUAgAAAMAgRlAIAAAAAIMYQSEAAAAADGIEhQAwyFxxxRU65JBD1NjY2N9ZAQAAecDd3xkAAOTOo48+qvnz52vBggUqLS3t7+wAAIA84GDyegAAkCvnnXeeFi9erH//+98aMmRIf2cHACCajwLALu/hhx+Ww+FI+3rttdf67Lv33HNPXXjhhT367BNPPKE77rjD9j2Hw6E5c+b0OF/ZNmfOHDkcjv7OhiTpmGOO0THHHNPf2bB177336p///KfmzZtHQAgAeYTmowAwSDz00EPaZ599Upbvt99+/ZCbrj3xxBNatmyZZs2alfLewoULtfvuu+c+U+ixd999V7/4xS/04osvauTIkf2dHQBAHIJCABgkxo0bp0MPPbS/s5EVRxxxRH9nARk65JBDtHnz5v7OBgDABs1HAQCSpIMPPlhHHXVUyvJoNKphw4bprLPOspZt27ZNl19+uYYNGyav16tRo0bpuuuuUzAY7PQ7zKasq1atSlj+2muvJTRlPeaYY/TCCy/oyy+/TGjqarJrPrps2TKdfvrpKi8vV0FBgQ466CA98sgjtt/z5JNP6rrrrlNtba1KS0t1/PHHa8WKFd3YS9ILL7yggw46SD6fTyNHjtStt95qm84wDN1999066KCDVFhYqPLycp199tn64osvEtK99957mjp1qqqqquTz+VRbW6tTTz1Va9eu7TQfhmHo5ptv1ogRI1RQUKBDDjlE//jHP2zTrl69Wt/61res79h33331u9/9TrFYzEqzatUqORwO3Xrrrbrttts0cuRIFRcXa9KkSXrrrbcS1nfhhRequLhYn332mU455RQVFxdr+PDhmj17dkoZCIVC+s1vfqN99tlHPp9PQ4cO1UUXXWQbIP7lL3/RpEmT5Pf7VVxcrClTpui9995LSPPFF1/onHPOUW1trXw+n6qrq3Xcccdp6dKlne4vAEB61BQCwCARjUYViUQSljkcDrlcLknSRRddpB/+8If69NNPNWbMGCvN/PnztX79el100UWSpPb2dh177LH6/PPP9etf/1oHHHCA/vWvf+nGG2/U0qVL9cILL/Q6r3fffbcuvfRSff7553rmmWe6TL9ixQpNnjxZVVVV+u///m9VVlbqscce04UXXqiNGzfq6quvTkj/s5/9TEceeaQeeOABNTY26pprrtFpp52m5cuXW/vDzj//+U+dfvrpmjRpkubOnatoNKqbb75ZGzduTEl72WWX6eGHH9aVV16pm266Sdu2bdP111+vyZMn6/3331d1dbVaWlp0wgknaOTIkfrDH/6g6upq1dXV6dVXX1VTU1On2/zrX/9av/71r3XxxRfr7LPP1po1a3TJJZcoGo1q7NixVrrNmzdr8uTJCoVC+q//+i/tueeeev7553XVVVfp888/1913352w3j/84Q/aZ599rP6cv/jFL3TKKado5cqVCgQCVrpwOKxp06bp4osv1uzZs/XGG2/ov/7rvxQIBPTLX/5SkhSLxXT66afrX//6l66++mpNnjxZX375pX71q1/pmGOO0eLFi1VYWChJuuGGG/Tzn/9cF110kX7+858rFArplltu0VFHHaVFixZZzZxPOeUUa7/vscce2rJli958801t37690/0FAOiEAQDYpT300EOGJNuXy+Wy0m3ZssXwer3Gz372s4TPT58+3aiurjbC4bBhGIZx7733GpKM//mf/0lId9NNNxmSjPnz51vLRowYYVxwwQUpeVm5cmXCZ1999VVDkvHqq69ay0499VRjxIgRttskyfjVr35l/X3OOecYPp/PWL16dUK6k08+2SgqKjK2b9+e8D2nnHJKQrr/+Z//MSQZCxcutP0+08SJE43a2lqjra3NWtbY2GhUVFQY8bfUhQsXGpKM3/3udwmfX7NmjVFYWGhcffXVhmEYxuLFiw1JxrPPPtvp9yarr683CgoKjDPPPDNh+b///W9DknH00Udby376058akoy33347Ie33vvc9w+FwGCtWrDAMwzBWrlxpSDLGjx9vRCIRK92iRYsMScaTTz5pLbvgggtsy8App5xijB071vr7ySefNCQZTz31VEK6d955x5Bk3H333YZhGMbq1asNt9ttXHHFFQnpmpqajJqaGmP69OmGYXSUUUnGHXfc0a39BADoHpqPAsAg8ec//1nvvPNOwuvtt9+23q+srNRpp52mRx55xGpWWF9fr//7v//T+eefL7e7o3HJK6+8Ir/fr7PPPjth/eYoo//85z9zs0FxXnnlFR133HEaPnx4Sp5aW1u1cOHChOXTpk1L+PuAAw6QJH355Zdpv6OlpUXvvPOOzjrrLBUUFFjLS0pKdNpppyWkff755+VwOPStb31LkUjEetXU1OjAAw+0msnutddeKi8v1zXXXKN7771XH3/8cbe2d+HChWpvb9d5552XsHzy5MkaMWJEwrJXXnlF++23nw4//PCE5RdeeKEMw9Arr7ySsPzUU09NqC1Nt28cDkfKdh9wwAEJ6Z5//nmVlZXptNNOS9gPBx10kGpqaqz98OKLLyoSiej8889PSFdQUKCjjz7aSldRUaHRo0frlltu0W233ab33nsvoQksAKBnCAoBYJDYd999deihhya8JkyYkJDm29/+ttatW6eXXnpJkvTkk08qGAwmTCuxdetW1dTUpEzBUFVVJbfbra1bt/b5tiTbunWrdtttt5TltbW11vvxKisrE/72+XySpLa2trTfUV9fr1gsppqampT3kpdt3LhRhmGourpaHo8n4fXWW29py5YtkqRAIKDXX39dBx10kH72s59p//33V21trX71q18pHA53ur1232u3rK/2TVFRUUJwbKZtb29P2A/bt2+X1+tN2Q91dXXWfjCb3x522GEp6f7yl79Y6RwOh/75z39qypQpuvnmm3XIIYdo6NChuvLKK7tsbgsASI8+hQAAy5QpU1RbW6uHHnpIU6ZM0UMPPaSJEycmTFtRWVmpt99+W4ZhJASGmzZtUiQS6XT+OTOISB6MxHzo76nKykpt2LAhZfn69eslKStz4pWXl8vhcKiuri7lveRlQ4YMkcPh0L/+9S8rqIoXv2z8+PGaO3euDMPQBx98oIcffljXX3+9CgsL9dOf/tQ2L2bgli4ve+65Z0Lavt436QwZMkSVlZWaN2+e7fslJSUJefjf//3flJrOZCNGjNCDDz4oSfrkk0/0P//zP5ozZ45CoZDuvffeLOYeAAYPagoBABaXy6WZM2fq2Wef1b/+9S8tXrxY3/72txPSHHfccWpubtazzz6bsPzPf/6z9X46ZrDywQcfJCx/7rnnUtL6fL5Oa+6S8/TKK69YgU58noqKirIyhYXf79fhhx+up59+OqE2rKmpSX/7298S0k6dOlWGYWjdunUptbOHHnqoxo8fn7J+h8OhAw88ULfffrvKysr07rvvps3LEUccoYKCAj3++OMJy998882UZp7HHXecPv7445T1/fnPf5bD4dCxxx7b7X2QqalTp2rr1q2KRqO2+8EcEGfKlClyu936/PPPbdOlm0pl77331s9//nONHz++0/0FAOgcNYUAMEgsW7YsZfRRSRo9erSGDh1q/f3tb39bN910k2bMmKHCwkJ985vfTEh//vnn6w9/+IMuuOACrVq1SuPHj9eCBQt0ww036JRTTtHxxx+fNg+HHXaYxo4dq6uuukqRSETl5eV
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Créer le graphique (a changé ! le bon est le barplot qui vient après)\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(purchases_graph['month'], purchases_graph['purchase_id'])\n",
"\n",
"# Définir le format de l'axe des x en fonction des dates\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))\n",
"plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=5)) # Ajustez l'intervalle selon vos besoins\n",
"\n",
"# Rotation des étiquettes de l'axe x pour une meilleure lisibilité\n",
"plt.xticks(rotation=45)\n",
"\n",
"\n",
"# Titres et labels\n",
"plt.title('Évolution des données')\n",
"plt.xlabel('Date')\n",
"plt.ylabel('Valeurs')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "0561564e-2c74-4d99-9aa3-26099160520e",
"metadata": {},
"source": [
"## TP : second graphique - barplot"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "1753d45c-2737-4082-a5b0-461071a03351",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>fake_category</th>\n",
" <th>purchase_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>2019-03-01</td>\n",
" <td>1</td>\n",
" <td>102</td>\n",
" </tr>\n",
" <tr>\n",
" <th>100</th>\n",
" <td>2019-05-01</td>\n",
" <td>1</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>2019-06-01</td>\n",
" <td>1</td>\n",
" <td>131</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142</th>\n",
" <td>2021-06-01</td>\n",
" <td>1</td>\n",
" <td>157</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>2021-07-01</td>\n",
" <td>1</td>\n",
" <td>145</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>2021-09-01</td>\n",
" <td>1</td>\n",
" <td>123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>150</th>\n",
" <td>2021-10-01</td>\n",
" <td>1</td>\n",
" <td>220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>2022-03-01</td>\n",
" <td>1</td>\n",
" <td>112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>2022-04-01</td>\n",
" <td>1</td>\n",
" <td>107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>164</th>\n",
" <td>2022-05-01</td>\n",
" <td>1</td>\n",
" <td>164</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>2022-06-01</td>\n",
" <td>1</td>\n",
" <td>158</td>\n",
" </tr>\n",
" <tr>\n",
" <th>172</th>\n",
" <td>2022-09-01</td>\n",
" <td>1</td>\n",
" <td>178</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174</th>\n",
" <td>2022-10-01</td>\n",
" <td>1</td>\n",
" <td>218</td>\n",
" </tr>\n",
" <tr>\n",
" <th>176</th>\n",
" <td>2022-11-01</td>\n",
" <td>1</td>\n",
" <td>137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178</th>\n",
" <td>2022-12-01</td>\n",
" <td>1</td>\n",
" <td>107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179</th>\n",
" <td>2023-01-01</td>\n",
" <td>0</td>\n",
" <td>2052</td>\n",
" </tr>\n",
" <tr>\n",
" <th>180</th>\n",
" <td>2023-01-01</td>\n",
" <td>1</td>\n",
" <td>5079</td>\n",
" </tr>\n",
" <tr>\n",
" <th>181</th>\n",
" <td>2023-02-01</td>\n",
" <td>0</td>\n",
" <td>2684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>182</th>\n",
" <td>2023-02-01</td>\n",
" <td>1</td>\n",
" <td>6350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>183</th>\n",
" <td>2023-03-01</td>\n",
" <td>0</td>\n",
" <td>2196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>184</th>\n",
" <td>2023-03-01</td>\n",
" <td>1</td>\n",
" <td>5304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>185</th>\n",
" <td>2023-04-01</td>\n",
" <td>0</td>\n",
" <td>3595</td>\n",
" </tr>\n",
" <tr>\n",
" <th>186</th>\n",
" <td>2023-04-01</td>\n",
" <td>1</td>\n",
" <td>8563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>187</th>\n",
" <td>2023-05-01</td>\n",
" <td>0</td>\n",
" <td>3727</td>\n",
" </tr>\n",
" <tr>\n",
" <th>188</th>\n",
" <td>2023-05-01</td>\n",
" <td>1</td>\n",
" <td>8653</td>\n",
" </tr>\n",
" <tr>\n",
" <th>189</th>\n",
" <td>2023-06-01</td>\n",
" <td>0</td>\n",
" <td>2904</td>\n",
" </tr>\n",
" <tr>\n",
" <th>190</th>\n",
" <td>2023-06-01</td>\n",
" <td>1</td>\n",
" <td>6641</td>\n",
" </tr>\n",
" <tr>\n",
" <th>191</th>\n",
" <td>2023-07-01</td>\n",
" <td>0</td>\n",
" <td>4247</td>\n",
" </tr>\n",
" <tr>\n",
" <th>192</th>\n",
" <td>2023-07-01</td>\n",
" <td>1</td>\n",
" <td>10022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>193</th>\n",
" <td>2023-08-01</td>\n",
" <td>0</td>\n",
" <td>6146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>194</th>\n",
" <td>2023-08-01</td>\n",
" <td>1</td>\n",
" <td>14593</td>\n",
" </tr>\n",
" <tr>\n",
" <th>195</th>\n",
" <td>2023-09-01</td>\n",
" <td>0</td>\n",
" <td>2954</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>2023-09-01</td>\n",
" <td>1</td>\n",
" <td>6900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>2023-10-01</td>\n",
" <td>0</td>\n",
" <td>3621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>2023-10-01</td>\n",
" <td>1</td>\n",
" <td>8313</td>\n",
" </tr>\n",
" <tr>\n",
" <th>199</th>\n",
" <td>2023-11-01</td>\n",
" <td>0</td>\n",
" <td>945</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200</th>\n",
" <td>2023-11-01</td>\n",
" <td>1</td>\n",
" <td>2268</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" month fake_category purchase_id\n",
"96 2019-03-01 1 102\n",
"100 2019-05-01 1 140\n",
"102 2019-06-01 1 131\n",
"142 2021-06-01 1 157\n",
"144 2021-07-01 1 145\n",
"148 2021-09-01 1 123\n",
"150 2021-10-01 1 220\n",
"160 2022-03-01 1 112\n",
"162 2022-04-01 1 107\n",
"164 2022-05-01 1 164\n",
"166 2022-06-01 1 158\n",
"172 2022-09-01 1 178\n",
"174 2022-10-01 1 218\n",
"176 2022-11-01 1 137\n",
"178 2022-12-01 1 107\n",
"179 2023-01-01 0 2052\n",
"180 2023-01-01 1 5079\n",
"181 2023-02-01 0 2684\n",
"182 2023-02-01 1 6350\n",
"183 2023-03-01 0 2196\n",
"184 2023-03-01 1 5304\n",
"185 2023-04-01 0 3595\n",
"186 2023-04-01 1 8563\n",
"187 2023-05-01 0 3727\n",
"188 2023-05-01 1 8653\n",
"189 2023-06-01 0 2904\n",
"190 2023-06-01 1 6641\n",
"191 2023-07-01 0 4247\n",
"192 2023-07-01 1 10022\n",
"193 2023-08-01 0 6146\n",
"194 2023-08-01 1 14593\n",
"195 2023-09-01 0 2954\n",
"196 2023-09-01 1 6900\n",
"197 2023-10-01 0 3621\n",
"198 2023-10-01 1 8313\n",
"199 2023-11-01 0 945\n",
"200 2023-11-01 1 2268"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases_graph[purchases_graph[\"purchase_id\"]>100] "
]
},
{
"cell_type": "markdown",
"id": "4113b464-1349-4e6e-a8c0-8a327eb7ef58",
"metadata": {},
"source": [
"à partir de 2023, rupture : passage de plusieurs centaines à + de 7k ventes (et 3k en nov 2023) - on prend slt 2023"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "161efc2b-8439-4fe7-b136-cc70b9e83267",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAHGCAYAAADkJDrPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB4A0lEQVR4nO3dd1gUV/s38O9KWYqwUqQpIDaiwYYFARsWQMUeSzBELJjEQowQlRgFjMYHY40+icYYsaCYxBJbiAU1MYoFg12jCbYI4qMIFqSe9w9/zOuwoKBLy34/17XXxZ45c+aeYWb33jNzZhRCCAEiIiIi0ho1KjsAIiIiIqpYTACJiIiItAwTQCIiIiItwwSQiIiISMswASQiIiLSMkwAiYiIiLQME0AiIiIiLcMEkIiIiEjLMAEkIiIi0jJMAImIXuLMmTMwMjLC0qVLKzsUomrj0aNHeOONNzB48GAUFBRUdjhURJVJAKOjo6FQKGBgYIDr16+rTe/SpQtcXFwqITIgMDAQNWvWrJRlv4xCoUBERESFLrNevXrw8/PTWHtPnjxBREQEDh48qLE2SXPq1auHwMBAjbXXpUsXdOnSRWPtlbeHDx/irbfewsSJEzFx4kSNtZufn4+FCxfC19cXdevWhZGREZo0aYJp06bhwYMHxc6zdOlSvPHGG1AqlXByckJkZCRyc3NldbZs2YK3334bDRs2hKGhIerVq4fhw4fjypUrau1Nnz4drVq1grm5OQwMDFC/fn2MHTu22M/g6i4wMBD16tXTaJtLly5Fw4YNoa+vD4VCIf3fPv30Uzg4OEBXVxe1atXS6DKrk6CgIFhbW2P9+vWoUePV0o3q9nlRnVSZBLBQdnY2Pv3008oOgyrQkydPEBkZyQSQqqTRo0ejXbt2+M9//qPRdrOyshAREQFHR0csXrwYu3fvRlBQEL755ht4enoiKytLVn/OnDn48MMPMXDgQPzyyy8YN24cPv/8c4wfP15WLyoqCk+ePMH06dMRFxeH2bNn448//oCrqyvOnz8vq/vgwQO8/fbbWLNmDeLi4hAaGoqdO3fCzc0N9+7d0+j6/tskJSUhODgYXl5eiI+Px9GjR2FiYoKffvoJc+bMwbvvvotDhw5h3759lR1qpfjvf/+LM2fO4KeffoJSqazscKgYupUdQFG+vr7YsGEDQkND0aJFi8oO57UJIfD06VMYGhpWdij0L/fkyRMYGRlVdhj/Ot9//325tGtoaIjk5GRYWFhIZV26dIGDgwMGDx6MzZs345133gEA3Lt3D7Nnz0ZQUBA+//xzqW5ubi4+/fRTTJo0CU2bNgUA7NixA1ZWVrJlde3aFfXq1cOiRYvw7bffSuX//e9/ZfW6dOkCJycn9OrVCz/99BNGjRpVLuteXrKysirss7YwmQ4KCkK7du2k8nPnzgEAgoOD1f4PRVVkvBVt/Pjxaj9Oqrv8/Hzk5eX9axLaKtcDOGXKFFhYWGDq1Kkvrfv06VOEhYXByckJ+vr6qFOnDsaPH692+qTwlOXOnTvRqlUrGBoaokmTJti5cyeAZ6efmzRpAmNjY7Rr1w4nT54sdnnnz59Ht27dYGxsjNq1a2PChAl48uSJrI5CocCECROwfPlyNGnSBEqlEmvWrAEAXLlyBf7+/rCysoJSqUSTJk3UPoBLkpmZiaCgIFhYWKBmzZrw9fXFn3/+Wap5nz59ipCQELRs2RIqlQrm5uZwd3fHTz/9pFa3oKAAS5cuRcuWLWFoaIhatWqhffv22L59u1rduLg4uLq6wtDQEG+88Qa+++472fS7d+9i3LhxaNq0KWrWrAkrKyt07doVv/32m1Tn2rVrqF27NgAgMjISCoUCCoVCOuV49+5djB07Fvb29lAqlahduzY8PT1f+qv66tWrGDlyJBo1agQjIyPUqVMHffr0wdmzZ2X1Ci89uHbtmqz84MGDUCgUpeqVvHTpEt5++21YW1tDqVTCwcEB7777LrKzs6U6586dQ79+/WBmZgYDAwO0bNlS2i9eJZbCSyJ+/fVXeHh4wMjISPqyjo+PR5cuXWBhYQFDQ0M4ODhg0KBBavtqUbm5uZgyZQpsbGxgZGSEDh064Pjx48XWTU1NxXvvvYe6detCX19fOh2Zl5f30u1VWps2bYK3tzdsbW2lY3batGl4/PixrF7hJRpXr15Fr169ULNmTdjb2yMkJET2P7h27RoUCgXmz5+PhQsXwsnJCTVr1oS7uzsSEhLUln/y5En07dtXOj3aqlWrYpPBV90WOjo6suSvUGEycfPmTaksLi4OT58+xciRI2V1R44cCSEEtm3bJpUVl3TY2dmhbt26sjZLUng86uq+uH+gcHvOmzcPc+bMgYODAwwMDNCmTRvs379frf7hw4fRrVs3mJiYwMjICB4eHti1a5esTkREBBQKhdq8xR0bhZ/rW7ZsQatWrWBgYIDIyMiXrl9pfPfdd2jRogUMDAxgbm6OAQMG4OLFi9L0Ll26SMm5m5ub9JlVr1496QyWtbW17BKdF8X7OsfT636/lXSKtbhT5l9//TVatGiBmjVrwsTEBG+88QY++eQTWZ3SrktkZCTc3Nxgbm4OU1NTuLq6YtWqVRBCvHSdc3JyMHv2bOlyiNq1a2PkyJG4e/fuS+cFgJUrV6Jx48ZQKpVo2rQpNmzYoLa+z+/fs2fPhpOTE5RKJQ4cOAAA2L59O9zd3WFkZAQTExP06NEDR48efek2BIrfzwtzhxUrVshii42NVZtfY5+/oopYvXq1ACBOnDghlixZIgCI/fv3S9M7d+4s3nzzTel9QUGB8PHxEbq6umLGjBliz549Yv78+cLY2Fi0atVKPH36VKrr6Ogo6tatK1xcXMTGjRvF7t27hZubm9DT0xMzZ84Unp6eYsuWLWLr1q2icePGwtraWjx58kSaf8SIEUJfX184ODiIOXPmiD179oiIiAihq6sr/Pz8ZOsBQNSpU0c0b95cbNiwQcTHx4tz586J8+fPC5VKJZo1aybWrl0r9uzZI0JCQkSNGjVERETEC7dNQUGB8PLyEkqlUlp+eHi4qF+/vgAgwsPDXzj/gwcPRGBgoFi3bp2Ij48XcXFxIjQ0VNSoUUOsWbNGVjcgIEAoFAoxZswY8dNPP4mff/5ZzJkzRyxZskRtezZt2lSsXbtW/PLLL2Lw4MECgDh06JBU79KlS+KDDz4QsbGx4uDBg2Lnzp1i9OjRokaNGuLAgQNCCCGePn0q4uLiBAAxevRocfToUXH06FFx9epVIYQQPj4+onbt2uKbb74RBw8eFNu2bRMzZ84UsbGxL1znQ4cOiZCQEPHjjz+KQ4cOia1bt4r+/fsLQ0NDcenSJale4X6XnJwsm//AgQMCgBRnSZKSkkTNmjVFvXr1xPLly8X+/fvF+vXrxZAhQ0RmZqa0HUxMTESDBg3E2rVrxa5du8Tbb78tAIioqKhXiqVz587C3Nxc2Nvbi6VLl4oDBw6IQ4cOieTkZGFgYCB69Oghtm3bJg4ePChiYmJEQECASE9Pf+G6jBgxQigUCvHxxx+LPXv2iIULF4o6deoIU1NTMWLECKleSkqKsLe3F46OjmLFihVi37594rPPPhNKpVIEBga+cBmFsXfu3Pml9T777DOxaNEisWvXLnHw4EGxfPly4eTkJLy8vNTi1tfXF02aNBHz588X+/btEzNnzhQKhUJERkZK9ZKTkwUAUa9ePeHr6yu2bdsmtm3bJpo1aybMzMzEgwcPpLrx8fFCX19fdOzYUWzatEnExcWJwMBAAUCsXr1aY9uiOIX7wU8//SSVTZs2TQAQjx49UqtvaWkp3n777Re2+ddff4kaNWqIjz76qNjpubm54smTJ+LUqVPC09NTNG7cWDx8+PCFbRZuT3t7e9GhQwexefNm8cMPP4i2bdsKPT09ceTIEanuwYMHhZ6enmjdurXYtGmT2LZtm/D29hYKhUJ2LIeHh4vivpaKOzYcHR2Fra2tqF+/vvjuu+/EgQMHxPHjx0uMd8SIEcLR0fGF6ySEEJ9//rkAIN5++22xa9cusXbtWlG/fn2hUqnEn3/+KYQQ4vz
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# début du graphique\n",
"\n",
"purchases_graph_used = purchases_graph[purchases_graph[\"month\"] >= datetime(2023,1,1)]\n",
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"fake_category\"]==0]\n",
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"fake_category\"]==1]\n",
"\n",
"\n",
"# Création du barplot\n",
"plt.bar(purchases_graph_used_0[\"month\"], purchases_graph_used_0[\"purchase_id\"], width=12, label = \"categorie 0\")\n",
"plt.bar(purchases_graph_used_0[\"month\"], purchases_graph_used_1[\"purchase_id\"], \n",
" bottom = purchases_graph_used_0[\"purchase_id\"], width=12, label = \"categorie 1\")\n",
"\n",
"\n",
"# commande pr afficher slt\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Mois')\n",
"plt.ylabel('Nombre d achats')\n",
"plt.title('Nombre d achats au cours de l année 2023 pour l offre muséale groupe')\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}