From a3016ce78ec3a27fb56d95d81ab2f40952083d1f Mon Sep 17 00:00:00 2001 From: frodrigue-ensae Date: Sat, 2 Mar 2024 11:16:24 +0000 Subject: [PATCH] stat_des --- Spectacle/Stat_desc.ipynb | 303 +++++++++++++++++++++++++++++++++++++- 1 file changed, 295 insertions(+), 8 deletions(-) diff --git a/Spectacle/Stat_desc.ipynb b/Spectacle/Stat_desc.ipynb index 42b5ca5..98db314 100644 --- a/Spectacle/Stat_desc.ipynb +++ b/Spectacle/Stat_desc.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e", "metadata": {}, "outputs": [], @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "9c1737a2-bad8-4266-8dec-452085d8cfe7", "metadata": {}, "outputs": [ @@ -42,7 +42,7 @@ " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "id": "40b705eb-fd18-436b-b150-61611a3c6a84", "metadata": {}, "outputs": [], @@ -89,13 +89,300 @@ " return df \n" ] }, + { + "cell_type": "markdown", + "id": "e22eb500-80da-4dd9-8b20-9e4deec64831", + "metadata": {}, + "source": [ + "nb_compagnie=['10','11','12','13','14']\n", + "for directory_path in nb_compagnie:\n", + " df_customerplus_clean = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n", + " df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n", + " df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n", + " df_customerplus_clean[\"Number_compagnie\"]=int(directory_path)\n", + " df_campaigns_information[\"Number_compagnie\"]=int(directory_path)\n", + " df_products_purchased_reduced[\"Number_compagnie\"]=int(directory_path)\n", + "\n", + " if nb_compagnie.index(directory_path)>=1:\n", + " customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n", + " campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_information],axis=0)\n", + " products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_products_purchased_reduced],axis=0)\n", + " else:\n", + " customerplus_clean_spectacle=df_customerplus_clean\n", + " campaigns_information_spectacle=df_campaigns_information\n", + " products_purchased_reduced_spectacle=df_products_purchased_reduced\n", + " " + ] + }, { "cell_type": "code", - "execution_count": null, - "id": "e56aa16f-a167-4bff-9f8b-f764d1f28ebd", + "execution_count": 6, + "id": "1c4a07ec-cf8e-420e-88f0-5eb4b83e2bc1", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "directory_path='10'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4249cdcd-ef59-4dd6-a345-4218bb90e526", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1282/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", + " df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1282/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", + " df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1282/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", + " df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n" + ] + } + ], + "source": [ + " # Import customerplus\n", + "df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n", + "df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n", + "df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e56aa16f-a167-4bff-9f8b-f764d1f28ebd", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":27: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" + ] + } + ], + "source": [ + " # Creation des KPI\n", + " # KPI sur les campagnes publicitaires\n", + "df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n", + "\n", + " # KPI sur le comportement d'achat\n", + "df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n", + "\n", + " # KPI sur les données socio-démographiques\n", + "df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n", + "\n", + "#creation de la colonne Number compagnie\n", + "df_tickets_kpi[\"Number_compagnie\"]=int(directory_path)\n", + "df_campaigns_kpi[\"Number_compagnie\"]=int(directory_path)\n", + "df_customerplus_clean[\"Number_compagnie\"]=int(directory_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "838722c3-4c78-4ffa-a6b6-01ac60f4bdbd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idnb_campaignsnb_campaigns_openedtime_to_openNumber_compagnie
02940.0NaT10
13730.0NaT10
23941.00 days 05:16:3810
34141.00 days 01:12:2910
44440.0NaT10
..................
5713882794010.0NaT10
5713982794110.0NaT10
5714082794210.0NaT10
5714182794310.0NaT10
5714282794410.0NaT10
\n", + "

57143 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n", + "0 29 4 0.0 NaT \n", + "1 37 3 0.0 NaT \n", + "2 39 4 1.0 0 days 05:16:38 \n", + "3 41 4 1.0 0 days 01:12:29 \n", + "4 44 4 0.0 NaT \n", + "... ... ... ... ... \n", + "57138 827940 1 0.0 NaT \n", + "57139 827941 1 0.0 NaT \n", + "57140 827942 1 0.0 NaT \n", + "57141 827943 1 0.0 NaT \n", + "57142 827944 1 0.0 NaT \n", + "\n", + " Number_compagnie \n", + "0 10 \n", + "1 10 \n", + "2 10 \n", + "3 10 \n", + "4 10 \n", + "... ... \n", + "57138 10 \n", + "57139 10 \n", + "57140 10 \n", + "57141 10 \n", + "57142 10 \n", + "\n", + "[57143 rows x 5 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_campaigns_kpi" + ] } ], "metadata": {