From 11e2e8658329c2354b20219e8c1b597dcde47f9e Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Sat, 9 Mar 2024 14:50:58 +0000 Subject: [PATCH] Ajout et sauvegarde --- 1_Descriptive_Statistics_Museum.ipynb | 201 ++++++++++++-------------- 1 file changed, 93 insertions(+), 108 deletions(-) diff --git a/1_Descriptive_Statistics_Museum.ipynb b/1_Descriptive_Statistics_Museum.ipynb index 1023f39..859c41b 100644 --- a/1_Descriptive_Statistics_Museum.ipynb +++ b/1_Descriptive_Statistics_Museum.ipynb @@ -93,16 +93,106 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 4, "id": "09daec01-9927-45c7-a6d4-9b9d0340ee02", "metadata": {}, "outputs": [], "source": [ - "companies = {'musee' : ['1', '2', '3', '4', '101'],\n", + "companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n", " 'sport': ['5', '6', '7', '8', '9'],\n", " 'musique' : ['10', '11', '12', '13', '14']}" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9ccb033-3c7a-4647-ae1a-3a439dec2ea1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : projet-bdc2324-team1/0_Input/Company_1/customerplus_cleaned.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : projet-bdc2324-team1/0_Input/Company_1/campaigns_information.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n" + ] + } + ], + "source": [ + "# création des bases contenant les KPI pour les 5 compagnies de spectacle\n", + "\n", + "# liste des compagnies de spectacle\n", + "nb_compagnie= companies['musee']\n", + "\n", + "customer_sport = pd.DataFrame()\n", + "campaigns_sport_brut = pd.DataFrame()\n", + "campaigns_sport_kpi = pd.DataFrame()\n", + "products_sport = pd.DataFrame()\n", + "tickets_sport = pd.DataFrame()\n", + "\n", + "# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n", + "for directory_path in nb_compagnie:\n", + " df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n", + " df_campaigns_brut = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n", + " df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n", + " df_target_information = display_databases(directory_path, file_name = \"target_information\")\n", + " \n", + " df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut) \n", + " df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n", + " df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n", + "\n", + " \n", + "# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n", + " df_tickets_kpi[\"number_company\"]=int(directory_path)\n", + " df_campaigns_brut[\"number_company\"]=int(directory_path)\n", + " df_campaigns_kpi[\"number_company\"]=int(directory_path)\n", + " df_customerplus_clean[\"number_company\"]=int(directory_path)\n", + " df_target_information[\"number_company\"]=int(directory_path)\n", + "\n", + "# Traitement des index\n", + " df_tickets_kpi[\"customer_id\"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str')\n", + " df_campaigns_brut[\"customer_id\"]= directory_path + '_' + df_campaigns_brut['customer_id'].astype('str')\n", + " df_campaigns_kpi[\"customer_id\"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str') \n", + " df_customerplus_clean[\"customer_id\"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str') \n", + " df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str') \n", + "\n", + "# Concaténation\n", + " customer_sport = pd.concat([customer_sport, df_customerplus_clean], ignore_index=True)\n", + " campaigns_sport_kpi = pd.concat([campaigns_sport_kpi, df_campaigns_kpi], ignore_index=True)\n", + " campaigns_sport_brut = pd.concat([campaigns_sport_brut, df_campaigns_brut], ignore_index=True) \n", + " tickets_sport = pd.concat([tickets_sport, df_tickets_kpi], ignore_index=True)\n", + " products_sport = pd.concat([products_sport, df_products_purchased_reduced], ignore_index=True)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "105862bd-5d66-45ed-be71-ec6e1e103963", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a", @@ -1247,115 +1337,10 @@ "# 1 - Comportement d'achat" ] }, - { - "cell_type": "code", - "execution_count": 34, - "id": "8917cc1b-4728-460c-8432-a633de7f039b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : projet-bdc2324-team1/0_Input/Company_2/products_purchased_reduced.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", - ":13: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced_1.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n" - ] - } - ], - "source": [ - "for company_number in ['1', '2', '3', '4', '101'] :\n", - " nom_dataframe = 'df'+ company_number +'_tickets'\n", - " globals()[nom_dataframe] = display_databases(company_number, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n", - "\n", - " if company_number == \"101\" :\n", - " df101_tickets_1 = display_databases(company_number, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n", - "\n", - " " - ] - }, { "cell_type": "markdown", "id": "3479960c-0d23-45f1-8fff-d87395205731", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "## Outlier" ]