From 5d4dde93f30f8a442f61248cae38b1542387690d Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Tue, 6 Feb 2024 21:09:08 +0000 Subject: [PATCH] Modification de la base df1_product_purchased --- 0_Cleaning_and_merge.ipynb | 48 ++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index b2c2018..9f3f20b 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -79,7 +79,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_492/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } @@ -242,17 +242,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/1591303091.py:5: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/1591303091.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", - "/tmp/ipykernel_3658/1591303091.py:9: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/1591303091.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", - "/tmp/ipykernel_3658/1591303091.py:13: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/1591303091.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -596,7 +596,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/3848597476.py:4: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/3848597476.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -810,19 +810,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -1010,7 +1010,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/3700263836.py:11: SettingWithCopyWarning: \n", + "/tmp/ipykernel_492/3700263836.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -2009,6 +2009,17 @@ "products_global.head()" ] }, + { + "cell_type": "code", + "execution_count": 32, + "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d", + "metadata": {}, + "outputs": [], + "source": [ + "# Fusion liée au product\n", + "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')" + ] + }, { "cell_type": "markdown", "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7", @@ -2019,28 +2030,25 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", "metadata": {}, "outputs": [], "source": [ - "# Fusion liée au product\n", - "df1_products_purchased = pd.merge(df1_tickets_kpi, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", - "\n", - "# Fusion liée au customer\n", - "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", - "\n", - "# Fusion product et customer\n", - "df1_customer_product = pd.merge(df1_customer, df1_products_purchased, on = 'customer_id', how = 'left')" + "# Fusion avec KPI liés au customer\n", + "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "1e42a790-b215-4107-a969-85005da06ebd", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Fusion avec KPI liés au comportement d'achat\n", + "# df1_customer_product = pd.merge(df1_customer, df1_products_purchased, on = 'customer_id', how = 'left')" + ] } ], "metadata": {