From 11e2e8658329c2354b20219e8c1b597dcde47f9e Mon Sep 17 00:00:00 2001
From: ajoubrel-ensae <antoine.joubrel@ensae.fr>
Date: Sat, 9 Mar 2024 14:50:58 +0000
Subject: [PATCH] Ajout et sauvegarde

---
 1_Descriptive_Statistics_Museum.ipynb | 201 ++++++++++++--------------
 1 file changed, 93 insertions(+), 108 deletions(-)
diff --git a/1_Descriptive_Statistics_Museum.ipynb b/1_Descriptive_Statistics_Museum.ipynb
index 1023f39..859c41b 100644
--- a/1_Descriptive_Statistics_Museum.ipynb
+++ b/1_Descriptive_Statistics_Museum.ipynb
@@ -93,16 +93,106 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 4,
    "id": "09daec01-9927-45c7-a6d4-9b9d0340ee02",
    "metadata": {},
    "outputs": [],
    "source": [
-    "companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
+    "companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
     "            'sport': ['5', '6', '7', '8', '9'],\n",
     "            'musique' : ['10', '11', '12', '13', '14']}"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d9ccb033-3c7a-4647-ae1a-3a439dec2ea1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "File path :  projet-bdc2324-team1/0_Input/Company_1/customerplus_cleaned.csv\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "File path :  projet-bdc2324-team1/0_Input/Company_1/campaigns_information.csv\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
+    "\n",
+    "# liste des compagnies de spectacle\n",
+    "nb_compagnie= companies['musee']\n",
+    "\n",
+    "customer_sport = pd.DataFrame()\n",
+    "campaigns_sport_brut = pd.DataFrame()\n",
+    "campaigns_sport_kpi = pd.DataFrame()\n",
+    "products_sport = pd.DataFrame()\n",
+    "tickets_sport = pd.DataFrame()\n",
+    "\n",
+    "# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
+    "for directory_path in nb_compagnie:\n",
+    "    df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
+    "    df_campaigns_brut = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
+    "    df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
+    "    df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
+    "    \n",
+    "    df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut) \n",
+    "    df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
+    "    df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
+    "\n",
+    "        \n",
+    "# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
+    "    df_tickets_kpi[\"number_company\"]=int(directory_path)\n",
+    "    df_campaigns_brut[\"number_company\"]=int(directory_path)\n",
+    "    df_campaigns_kpi[\"number_company\"]=int(directory_path)\n",
+    "    df_customerplus_clean[\"number_company\"]=int(directory_path)\n",
+    "    df_target_information[\"number_company\"]=int(directory_path)\n",
+    "\n",
+    "# Traitement des index\n",
+    "    df_tickets_kpi[\"customer_id\"]= directory_path + '_' +  df_tickets_kpi['customer_id'].astype('str')\n",
+    "    df_campaigns_brut[\"customer_id\"]= directory_path + '_' +  df_campaigns_brut['customer_id'].astype('str')\n",
+    "    df_campaigns_kpi[\"customer_id\"]= directory_path + '_' +  df_campaigns_kpi['customer_id'].astype('str') \n",
+    "    df_customerplus_clean[\"customer_id\"]= directory_path + '_' +  df_customerplus_clean['customer_id'].astype('str') \n",
+    "    df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' +  df_products_purchased_reduced['customer_id'].astype('str') \n",
+    "\n",
+    "# Concaténation\n",
+    "    customer_sport = pd.concat([customer_sport, df_customerplus_clean], ignore_index=True)\n",
+    "    campaigns_sport_kpi = pd.concat([campaigns_sport_kpi, df_campaigns_kpi], ignore_index=True)\n",
+    "    campaigns_sport_brut = pd.concat([campaigns_sport_brut, df_campaigns_brut], ignore_index=True) \n",
+    "    tickets_sport = pd.concat([tickets_sport, df_tickets_kpi], ignore_index=True)\n",
+    "    products_sport = pd.concat([products_sport, df_products_purchased_reduced], ignore_index=True)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "105862bd-5d66-45ed-be71-ec6e1e103963",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a",
@@ -1247,115 +1337,10 @@
     "# 1 - Comportement d'achat"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "8917cc1b-4728-460c-8432-a633de7f039b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "File path :  projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "File path :  projet-bdc2324-team1/0_Input/Company_2/products_purchased_reduced.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "File path :  projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "File path :  projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
-      "<string>:13: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "File path :  projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "File path :  projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced_1.csv\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
-     ]
-    }
-   ],
-   "source": [
-    "for company_number in ['1', '2', '3', '4', '101'] :\n",
-    "    nom_dataframe = 'df'+ company_number +'_tickets'\n",
-    "    globals()[nom_dataframe] = display_databases(company_number, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
-    "\n",
-    "    if company_number == \"101\" :\n",
-    "        df101_tickets_1 = display_databases(company_number, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
-    "\n",
-    "    "
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "3479960c-0d23-45f1-8fff-d87395205731",
-   "metadata": {
-    "jp-MarkdownHeadingCollapsed": true
-   },
+   "metadata": {},
    "source": [
     "## Outlier"
    ]