diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb
index a3018ba..a8dfc0f 100644
--- a/0_Cleaning_and_merge.ipynb
+++ b/0_Cleaning_and_merge.ipynb
@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 1,
"id": "15103481-8d74-404c-aa09-7601fe7730da",
"metadata": {},
"outputs": [],
@@ -119,10 +119,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2240/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(file_in)\n"
+ ]
+ }
+ ],
"source": [
"# loop to create dataframes from liste\n",
"files_path = liste_database\n",
@@ -149,7 +158,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
"metadata": {},
"outputs": [],
@@ -190,11 +199,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"id": "7e7b90ce-da54-4f00-bc34-64c543b0858f",
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "def preprocessing_customerplus(customerplus = None):\n",
+ "\n",
+ " customerplus_copy = customerplus.copy()\n",
+ " \n",
+ " # Passage en format date\n",
+ " cleaning_date(customerplus_copy, 'first_buying_date')\n",
+ " cleaning_date(customerplus_copy, 'last_visiting_date')\n",
+ " \n",
+ " # Selection des variables\n",
+ " customerplus_copy.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n",
+ " customerplus_copy.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
+ "\n",
+ " return customerplus_copy\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "03329e32-00a5-42c8-9470-75f7b6216ccd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1_customerplus_clean = preprocessing_customerplus(df1_customersplus)"
+ ]
},
{
"cell_type": "markdown",
@@ -206,7 +239,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"id": "b95464b1-26bc-4aac-84b4-45da83b92251",
"metadata": {},
"outputs": [],
@@ -222,8 +255,8 @@
" suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
"\n",
" # Base des types de billets\n",
- " # type_ofs = type_ofs[['id', 'name', 'children']]\n",
- " # type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n",
+ " type_ofs = type_ofs[['id', 'name', 'children']]\n",
+ " type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n",
"\n",
" # Base des achats\n",
" # Nettoyage de la date d'achat\n",
@@ -236,9 +269,9 @@
" ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
" ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
" \n",
- " # # Fusion avec type de tickets\n",
- " # ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
- " # ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
+ " # Fusion avec type de tickets\n",
+ " ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
+ " ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
" \n",
" # Fusion avec achats\n",
" ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
@@ -249,22 +282,501 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2240/1591303091.py:5: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
+ "/tmp/ipykernel_2240/1591303091.py:9: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
+ "/tmp/ipykernel_2240/1591303091.py:13: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n"
+ ]
+ }
+ ],
"source": [
"df1_ticket_information = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9",
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ticket_id | \n",
+ " product_id | \n",
+ " is_from_subscription | \n",
+ " supplier_name | \n",
+ " type_of_ticket_name | \n",
+ " children | \n",
+ " purchase_date | \n",
+ " customer_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 13070859 | \n",
+ " 225251 | \n",
+ " False | \n",
+ " vente en ligne | \n",
+ " Atelier | \n",
+ " pricing_formula | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 13070860 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " vente en ligne | \n",
+ " Atelier | \n",
+ " pricing_formula | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13070861 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " vente en ligne | \n",
+ " Atelier | \n",
+ " pricing_formula | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13070862 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " vente en ligne | \n",
+ " Atelier | \n",
+ " pricing_formula | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 13070863 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " vente en ligne | \n",
+ " Atelier | \n",
+ " pricing_formula | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1826667 | \n",
+ " 18643847 | \n",
+ " 350454 | \n",
+ " False | \n",
+ " vad | \n",
+ " Billet en nombre | \n",
+ " pricing_formula | \n",
+ " 2022-08-02 08:59:17+00:00 | \n",
+ " 41 | \n",
+ "
\n",
+ " \n",
+ " 1826668 | \n",
+ " 19853111 | \n",
+ " 383564 | \n",
+ " False | \n",
+ " vad | \n",
+ " Billet en nombre | \n",
+ " pricing_formula | \n",
+ " 2022-11-04 14:25:42+00:00 | \n",
+ " 62763 | \n",
+ "
\n",
+ " \n",
+ " 1826669 | \n",
+ " 19860514 | \n",
+ " 383751 | \n",
+ " False | \n",
+ " vad | \n",
+ " Billet en nombre | \n",
+ " pricing_formula | \n",
+ " 2022-11-18 10:47:26+00:00 | \n",
+ " 1195566 | \n",
+ "
\n",
+ " \n",
+ " 1826670 | \n",
+ " 19860515 | \n",
+ " 383751 | \n",
+ " False | \n",
+ " vad | \n",
+ " Billet en nombre | \n",
+ " pricing_formula | \n",
+ " 2022-11-18 10:47:26+00:00 | \n",
+ " 1195566 | \n",
+ "
\n",
+ " \n",
+ " 1826671 | \n",
+ " 19860516 | \n",
+ " 383751 | \n",
+ " False | \n",
+ " vad | \n",
+ " Billet en nombre | \n",
+ " pricing_formula | \n",
+ " 2022-11-18 10:47:26+00:00 | \n",
+ " 1195566 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1826672 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ticket_id product_id is_from_subscription supplier_name \\\n",
+ "0 13070859 225251 False vente en ligne \n",
+ "1 13070860 224914 False vente en ligne \n",
+ "2 13070861 224914 False vente en ligne \n",
+ "3 13070862 224914 False vente en ligne \n",
+ "4 13070863 224914 False vente en ligne \n",
+ "... ... ... ... ... \n",
+ "1826667 18643847 350454 False vad \n",
+ "1826668 19853111 383564 False vad \n",
+ "1826669 19860514 383751 False vad \n",
+ "1826670 19860515 383751 False vad \n",
+ "1826671 19860516 383751 False vad \n",
+ "\n",
+ " type_of_ticket_name children purchase_date \\\n",
+ "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
+ "1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
+ "2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
+ "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
+ "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n",
+ "... ... ... ... \n",
+ "1826667 Billet en nombre pricing_formula 2022-08-02 08:59:17+00:00 \n",
+ "1826668 Billet en nombre pricing_formula 2022-11-04 14:25:42+00:00 \n",
+ "1826669 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n",
+ "1826670 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n",
+ "1826671 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n",
+ "\n",
+ " customer_id \n",
+ "0 48187 \n",
+ "1 48187 \n",
+ "2 48187 \n",
+ "3 48187 \n",
+ "4 48187 \n",
+ "... ... \n",
+ "1826667 41 \n",
+ "1826668 62763 \n",
+ "1826669 1195566 \n",
+ "1826670 1195566 \n",
+ "1826671 1195566 \n",
+ "\n",
+ "[1826672 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_ticket_information"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "37499eae-1a7f-4dce-83b0-ff942ccf7a9d",
+ "metadata": {},
+ "source": [
+ "### KPI tickets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "043303fe-e90f-4689-a2a9-5d690555a045",
+ "metadata": {},
"outputs": [],
"source": [
- "df1_ticket_information"
+ "def tickets_kpi_function(tickets_information = None):\n",
+ " tickets_information_copy = tickets_information.copy()\n",
+ " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n",
+ " tickets_kpi = (tickets_information_copy[['product_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max']]\n",
+ " .groupby(['product_id', 'customer_id'])\n",
+ " .agg({'ticket_id': 'count', \n",
+ " 'supplier_name': 'nunique',\n",
+ " 'purchase_date_max' : 'max',\n",
+ " 'purchase_date' : 'min'})\n",
+ " .reset_index()\n",
+ " )\n",
+ " \n",
+ " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n",
+ " 'supplier_name' : 'nb_suppliers', \n",
+ " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n",
+ " \n",
+ " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n",
+ " \n",
+ " return tickets_kpi\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "5882234a-1ed5-4269-87a6-0d75613476e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_ticket_information)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " customer_id | \n",
+ " nb_tickets | \n",
+ " nb_suppliers | \n",
+ " purchase_date_max | \n",
+ " purchase_date_min | \n",
+ " time_between_purchase | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 107310 | \n",
+ " 2805 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2019-06-05 14:37:13+00:00 | \n",
+ " 2019-06-05 14:18:38+00:00 | \n",
+ " 0 days 00:18:35 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 110089 | \n",
+ " 54355 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2017-02-17 13:32:51+00:00 | \n",
+ " 2017-02-17 13:32:51+00:00 | \n",
+ " 0 days 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 110089 | \n",
+ " 54356 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2017-03-02 14:36:16+00:00 | \n",
+ " 2017-03-02 14:36:16+00:00 | \n",
+ " 0 days 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 110089 | \n",
+ " 54357 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2017-03-06 15:16:41+00:00 | \n",
+ " 2017-03-06 15:16:41+00:00 | \n",
+ " 0 days 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 110089 | \n",
+ " 54358 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2017-03-13 16:07:27+00:00 | \n",
+ " 2017-03-13 16:07:27+00:00 | \n",
+ " 0 days 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 128360 | \n",
+ " 406026 | \n",
+ " 1 | \n",
+ " 11 | \n",
+ " 2 | \n",
+ " 2023-11-08 12:53:31+00:00 | \n",
+ " 2023-11-08 09:30:28+00:00 | \n",
+ " 0 days 03:23:03 | \n",
+ "
\n",
+ " \n",
+ " 128361 | \n",
+ " 406027 | \n",
+ " 1 | \n",
+ " 31 | \n",
+ " 2 | \n",
+ " 2023-11-08 15:59:11+00:00 | \n",
+ " 2023-11-08 09:15:36+00:00 | \n",
+ " 0 days 06:43:35 | \n",
+ "
\n",
+ " \n",
+ " 128362 | \n",
+ " 406028 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2023-11-08 14:56:08+00:00 | \n",
+ " 2023-11-08 11:18:37+00:00 | \n",
+ " 0 days 03:37:31 | \n",
+ "
\n",
+ " \n",
+ " 128363 | \n",
+ " 406029 | \n",
+ " 1256130 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2023-11-08 10:35:43+00:00 | \n",
+ " 2023-11-08 10:35:43+00:00 | \n",
+ " 0 days 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " 128364 | \n",
+ " 406029 | \n",
+ " 1256133 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2023-11-08 16:51:19+00:00 | \n",
+ " 2023-11-08 16:51:19+00:00 | \n",
+ " 0 days 00:00:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
128365 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " product_id customer_id nb_tickets nb_suppliers \\\n",
+ "0 107310 2805 4 2 \n",
+ "1 110089 54355 1 1 \n",
+ "2 110089 54356 1 1 \n",
+ "3 110089 54357 1 1 \n",
+ "4 110089 54358 1 1 \n",
+ "... ... ... ... ... \n",
+ "128360 406026 1 11 2 \n",
+ "128361 406027 1 31 2 \n",
+ "128362 406028 1 2 1 \n",
+ "128363 406029 1256130 2 1 \n",
+ "128364 406029 1256133 3 1 \n",
+ "\n",
+ " purchase_date_max purchase_date_min \\\n",
+ "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 \n",
+ "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 \n",
+ "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 \n",
+ "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 \n",
+ "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 \n",
+ "... ... ... \n",
+ "128360 2023-11-08 12:53:31+00:00 2023-11-08 09:30:28+00:00 \n",
+ "128361 2023-11-08 15:59:11+00:00 2023-11-08 09:15:36+00:00 \n",
+ "128362 2023-11-08 14:56:08+00:00 2023-11-08 11:18:37+00:00 \n",
+ "128363 2023-11-08 10:35:43+00:00 2023-11-08 10:35:43+00:00 \n",
+ "128364 2023-11-08 16:51:19+00:00 2023-11-08 16:51:19+00:00 \n",
+ "\n",
+ " time_between_purchase \n",
+ "0 0 days 00:18:35 \n",
+ "1 0 days 00:00:00 \n",
+ "2 0 days 00:00:00 \n",
+ "3 0 days 00:00:00 \n",
+ "4 0 days 00:00:00 \n",
+ "... ... \n",
+ "128360 0 days 03:23:03 \n",
+ "128361 0 days 06:43:35 \n",
+ "128362 0 days 03:37:31 \n",
+ "128363 0 days 00:00:00 \n",
+ "128364 0 days 00:00:00 \n",
+ "\n",
+ "[128365 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_tickets_kpi"
]
},
{
@@ -277,7 +789,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
"metadata": {},
"outputs": [],
@@ -306,32 +818,413 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2240/3848597476.py:4: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
+ ]
+ }
+ ],
"source": [
"df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ "
\n",
+ " \n",
+ " target_name | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " consentement optin mediation specialisee | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " consentement optin jeune public | \n",
+ " 149979 | \n",
+ "
\n",
+ " \n",
+ " consentement optin b2c | \n",
+ " 108909 | \n",
+ "
\n",
+ " \n",
+ " Arenametrix_bascule tel vers sib | \n",
+ " 35216 | \n",
+ "
\n",
+ " \n",
+ " consentement optout b2c | \n",
+ " 34523 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " Automation_parrainage_newsletter_handicap_visuel | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " consentement optout mediation specialisee | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " Inscrits NL LSF formulaire | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " Market auto - contacts inactifs post-scénario | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " Inactifs - fin du scénario | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
283 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id\n",
+ "target_name \n",
+ "consentement optin mediation specialisee 150000\n",
+ "consentement optin jeune public 149979\n",
+ "consentement optin b2c 108909\n",
+ "Arenametrix_bascule tel vers sib 35216\n",
+ "consentement optout b2c 34523\n",
+ "... ...\n",
+ "Automation_parrainage_newsletter_handicap_visuel 1\n",
+ "consentement optout mediation specialisee 1\n",
+ "Inscrits NL LSF formulaire 1\n",
+ "Market auto - contacts inactifs post-scénario 1\n",
+ "Inactifs - fin du scénario 1\n",
+ "\n",
+ "[283 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
"metadata": {
"scrolled": true
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ "
\n",
+ " \n",
+ " target_name | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Arenametrix_bascule tel vers sib | \n",
+ " 35216 | \n",
+ "
\n",
+ " \n",
+ " Autres_interet_exposition | \n",
+ " 1021 | \n",
+ "
\n",
+ " \n",
+ " COM Inscrits NL générale (historique) | \n",
+ " 23005 | \n",
+ "
\n",
+ " \n",
+ " Contacts_prenomsdoubles | \n",
+ " 11643 | \n",
+ "
\n",
+ " \n",
+ " DDCP MD Procès du Siècle | \n",
+ " 1684 | \n",
+ "
\n",
+ " \n",
+ " DDCP Newsletter centres de loisirs | \n",
+ " 1032 | \n",
+ "
\n",
+ " \n",
+ " DDCP Newsletter enseignants | \n",
+ " 4510 | \n",
+ "
\n",
+ " \n",
+ " DDCP Newsletter jeune public | \n",
+ " 3862 | \n",
+ "
\n",
+ " \n",
+ " DDCP Newsletter relais champ social | \n",
+ " 2270 | \n",
+ "
\n",
+ " \n",
+ " DDCP PROMO Participants ateliers (adultes et enfants) | \n",
+ " 1954 | \n",
+ "
\n",
+ " \n",
+ " DDCP billets famille | \n",
+ " 3609 | \n",
+ "
\n",
+ " \n",
+ " DDCP promo MD pass musées dps oct 2018 | \n",
+ " 1785 | \n",
+ "
\n",
+ " \n",
+ " DDCP promo Plan B 2019 (concerts) | \n",
+ " 1948 | \n",
+ "
\n",
+ " \n",
+ " DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers) | \n",
+ " 1293 | \n",
+ "
\n",
+ " \n",
+ " DDCP rentrée culturelle 2023 | \n",
+ " 1757 | \n",
+ "
\n",
+ " \n",
+ " DDCP_marseille_jazz_2023 | \n",
+ " 1043 | \n",
+ "
\n",
+ " \n",
+ " DRE Festival Jean Rouch | \n",
+ " 1502 | \n",
+ "
\n",
+ " \n",
+ " DRE MucemLab | \n",
+ " 2302 | \n",
+ "
\n",
+ " \n",
+ " DRE chercheurs | \n",
+ " 1557 | \n",
+ "
\n",
+ " \n",
+ " DRE institutionnels | \n",
+ " 2229 | \n",
+ "
\n",
+ " \n",
+ " FORMATION _ acheteurs optin last year | \n",
+ " 10485 | \n",
+ "
\n",
+ " \n",
+ " Inscrits NL générale (export_291019 + operation_videomaton) | \n",
+ " 14086 | \n",
+ "
\n",
+ " \n",
+ " Inscrits NL générale site web | \n",
+ " 3732 | \n",
+ "
\n",
+ " \n",
+ " Inscrits NL jeune public site web | \n",
+ " 1249 | \n",
+ "
\n",
+ " \n",
+ " Votre première liste | \n",
+ " 3715 | \n",
+ "
\n",
+ " \n",
+ " consentement optin b2b | \n",
+ " 12735 | \n",
+ "
\n",
+ " \n",
+ " consentement optin b2c | \n",
+ " 108909 | \n",
+ "
\n",
+ " \n",
+ " consentement optin dre | \n",
+ " 4527 | \n",
+ "
\n",
+ " \n",
+ " consentement optin jeune public | \n",
+ " 149979 | \n",
+ "
\n",
+ " \n",
+ " consentement optin mediation specialisee | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " consentement optin newsletter generale | \n",
+ " 22095 | \n",
+ "
\n",
+ " \n",
+ " consentement optin scolaires | \n",
+ " 4849 | \n",
+ "
\n",
+ " \n",
+ " consentement optout b2b | \n",
+ " 14219 | \n",
+ "
\n",
+ " \n",
+ " consentement optout b2c | \n",
+ " 34523 | \n",
+ "
\n",
+ " \n",
+ " consentement optout dre | \n",
+ " 14328 | \n",
+ "
\n",
+ " \n",
+ " consentement optout newsletter generale | \n",
+ " 18855 | \n",
+ "
\n",
+ " \n",
+ " consentement optout scolaires | \n",
+ " 15744 | \n",
+ "
\n",
+ " \n",
+ " ddcp_md_scene_ouverte_au_talent | \n",
+ " 1577 | \n",
+ "
\n",
+ " \n",
+ " ddcp_promo_MD_billet_musée_oct_2019_agarder2 | \n",
+ " 5482 | \n",
+ "
\n",
+ " \n",
+ " ddcp_promo_md_musée_dps 011019 | \n",
+ " 6010 | \n",
+ "
\n",
+ " \n",
+ " ddcp_promo_visiteurs occasionnels_musee_8mois | \n",
+ " 6640 | \n",
+ "
\n",
+ " \n",
+ " ddcp_visiteurs dps 010622 | \n",
+ " 12355 | \n",
+ "
\n",
+ " \n",
+ " festival_jean_rouch | \n",
+ " 1502 | \n",
+ "
\n",
+ " \n",
+ " rappel po barvalo | \n",
+ " 1248 | \n",
+ "
\n",
+ " \n",
+ " structures_etiquette champ social | \n",
+ " 1488 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id\n",
+ "target_name \n",
+ "Arenametrix_bascule tel vers sib 35216\n",
+ "Autres_interet_exposition 1021\n",
+ "COM Inscrits NL générale (historique) 23005\n",
+ "Contacts_prenomsdoubles 11643\n",
+ "DDCP MD Procès du Siècle 1684\n",
+ "DDCP Newsletter centres de loisirs 1032\n",
+ "DDCP Newsletter enseignants 4510\n",
+ "DDCP Newsletter jeune public 3862\n",
+ "DDCP Newsletter relais champ social 2270\n",
+ "DDCP PROMO Participants ateliers (adultes et en... 1954\n",
+ "DDCP billets famille 3609\n",
+ "DDCP promo MD pass musées dps oct 2018 1785\n",
+ "DDCP promo Plan B 2019 (concerts) 1948\n",
+ "DDCP promo spectateurs prog 21-22 (spectacles, ... 1293\n",
+ "DDCP rentrée culturelle 2023 1757\n",
+ "DDCP_marseille_jazz_2023 1043\n",
+ "DRE Festival Jean Rouch 1502\n",
+ "DRE MucemLab 2302\n",
+ "DRE chercheurs 1557\n",
+ "DRE institutionnels 2229\n",
+ "FORMATION _ acheteurs optin last year 10485\n",
+ "Inscrits NL générale (export_291019 + operation... 14086\n",
+ "Inscrits NL générale site web 3732\n",
+ "Inscrits NL jeune public site web 1249\n",
+ "Votre première liste 3715\n",
+ "consentement optin b2b 12735\n",
+ "consentement optin b2c 108909\n",
+ "consentement optin dre 4527\n",
+ "consentement optin jeune public 149979\n",
+ "consentement optin mediation specialisee 150000\n",
+ "consentement optin newsletter generale 22095\n",
+ "consentement optin scolaires 4849\n",
+ "consentement optout b2b 14219\n",
+ "consentement optout b2c 34523\n",
+ "consentement optout dre 14328\n",
+ "consentement optout newsletter generale 18855\n",
+ "consentement optout scolaires 15744\n",
+ "ddcp_md_scene_ouverte_au_talent 1577\n",
+ "ddcp_promo_MD_billet_musée_oct_2019_agarder2 5482\n",
+ "ddcp_promo_md_musée_dps 011019 6010\n",
+ "ddcp_promo_visiteurs occasionnels_musee_8mois 6640\n",
+ "ddcp_visiteurs dps 010622 12355\n",
+ "festival_jean_rouch 1502\n",
+ "rappel po barvalo 1248\n",
+ "structures_etiquette champ social 1488"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n",
"df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000]"
@@ -347,7 +1240,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
"metadata": {},
"outputs": [],
@@ -372,32 +1265,278 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
+ "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
+ "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n"
+ ]
+ }
+ ],
"source": [
"df1_campaigns_information = preprocessing_campaigns_area(campaign_stats = df1_campaign_stats, campaigns = df1_campaigns)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
- "metadata": {},
- "outputs": [],
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 112597 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:01:09+00:00 | \n",
+ " 2021-03-28 16:24:18+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 113666 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:01:09+00:00 | \n",
+ " 2021-03-28 16:21:02+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 280561 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:00:59+00:00 | \n",
+ " 2021-03-28 16:08:45+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 101007 | \n",
+ " 2021-03-28 18:11:06+00:00 | \n",
+ " 2021-03-28 16:00:59+00:00 | \n",
+ " 2021-03-28 16:09:47+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 103972 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:01:06+00:00 | \n",
+ " 2021-03-28 16:05:03+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 6214803 | \n",
+ " 8302994 | \n",
+ " 266155 | \n",
+ " 2023-10-23 09:43:25+00:00 | \n",
+ " 2023-10-23 09:32:33+00:00 | \n",
+ " 2023-10-23 09:32:34+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " 6214804 | \n",
+ " 8303307 | \n",
+ " 21355 | \n",
+ " 2023-10-23 09:44:02+00:00 | \n",
+ " 2023-10-23 09:32:49+00:00 | \n",
+ " 2023-10-23 09:32:49+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " 6214805 | \n",
+ " 8304346 | \n",
+ " 21849 | \n",
+ " 2023-10-23 09:45:52+00:00 | \n",
+ " 2023-10-23 09:33:28+00:00 | \n",
+ " 2023-10-23 09:33:29+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " 6214806 | \n",
+ " 8302037 | \n",
+ " 667789 | \n",
+ " 2023-10-23 09:47:32+00:00 | \n",
+ " 2023-10-23 09:31:53+00:00 | \n",
+ " 2023-10-23 09:31:54+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " 6214807 | \n",
+ " 8304939 | \n",
+ " 294154 | \n",
+ " NaT | \n",
+ " 2023-10-23 09:33:54+00:00 | \n",
+ " 2023-10-23 09:33:55+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6214808 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id opened_at \\\n",
+ "0 19793 112597 NaT \n",
+ "1 14211 113666 NaT \n",
+ "2 13150 280561 NaT \n",
+ "3 7073 101007 2021-03-28 18:11:06+00:00 \n",
+ "4 5175 103972 NaT \n",
+ "... ... ... ... \n",
+ "6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n",
+ "6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n",
+ "6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n",
+ "6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n",
+ "6214807 8304939 294154 NaT \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n",
+ "1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n",
+ "2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n",
+ "3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n",
+ "4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n",
+ "... ... ... \n",
+ "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n",
+ "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n",
+ "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n",
+ "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n",
+ "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "... ... ... \n",
+ "6214803 dre_nov_2023 1318 \n",
+ "6214804 dre_nov_2023 1318 \n",
+ "6214805 dre_nov_2023 1318 \n",
+ "6214806 dre_nov_2023 1318 \n",
+ "6214807 dre_nov_2023 1318 \n",
+ "\n",
+ " campaign_sent_at \n",
+ "0 2021-03-27 23:00:00+00:00 \n",
+ "1 2021-03-27 23:00:00+00:00 \n",
+ "2 2021-03-27 23:00:00+00:00 \n",
+ "3 2021-03-27 23:00:00+00:00 \n",
+ "4 2021-03-27 23:00:00+00:00 \n",
+ "... ... \n",
+ "6214803 2023-10-23 09:31:17+00:00 \n",
+ "6214804 2023-10-23 09:31:17+00:00 \n",
+ "6214805 2023-10-23 09:31:17+00:00 \n",
+ "6214806 2023-10-23 09:31:17+00:00 \n",
+ "6214807 2023-10-23 09:31:17+00:00 \n",
+ "\n",
+ "[6214808 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df1_campaigns_information"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
"metadata": {},
"outputs": [],
"source": [
- "def campaigns_kpi(campaigns_information = None):\n",
+ "def campaigns_kpi_function(campaigns_information = None):\n",
" # Nombre de campagnes de mails\n",
" nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
" nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
@@ -426,20 +1565,177 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"id": "24537647-bc29-4777-9848-ac4120a4aa60",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2240/3700263836.py:11: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n"
+ ]
+ }
+ ],
"source": [
- "df1_campaigns_kpi = campaigns_kpi(campaigns_information = df1_campaigns_information) "
+ "df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information) "
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 222 | \n",
+ " 124.0 | \n",
+ " 1 days 00:28:30.169354838 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 7 | \n",
+ " 7.0 | \n",
+ " 1 days 04:31:01.428571428 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 6 | \n",
+ " 20 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 130467 | \n",
+ " 1256097 | \n",
+ " 1 | \n",
+ " 1.0 | \n",
+ " 0 days 02:11:15 | \n",
+ "
\n",
+ " \n",
+ " 130468 | \n",
+ " 1256098 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 130469 | \n",
+ " 1256099 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 130470 | \n",
+ " 1256100 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " 130471 | \n",
+ " 1256101 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
130472 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id nb_campaigns nb_campaigns_opened \\\n",
+ "0 2 4 0.0 \n",
+ "1 3 222 124.0 \n",
+ "2 4 7 7.0 \n",
+ "3 5 4 0.0 \n",
+ "4 6 20 0.0 \n",
+ "... ... ... ... \n",
+ "130467 1256097 1 1.0 \n",
+ "130468 1256098 1 0.0 \n",
+ "130469 1256099 1 0.0 \n",
+ "130470 1256100 1 0.0 \n",
+ "130471 1256101 1 0.0 \n",
+ "\n",
+ " time_to_open \n",
+ "0 NaT \n",
+ "1 1 days 00:28:30.169354838 \n",
+ "2 1 days 04:31:01.428571428 \n",
+ "3 NaT \n",
+ "4 NaT \n",
+ "... ... \n",
+ "130467 0 days 02:11:15 \n",
+ "130468 NaT \n",
+ "130469 NaT \n",
+ "130470 NaT \n",
+ "130471 NaT \n",
+ "\n",
+ "[130472 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df1_campaigns_kpi"
]
@@ -462,7 +1758,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 24,
"id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d",
"metadata": {},
"outputs": [],
@@ -473,7 +1769,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 25,
"id": "607eb4b4-eed9-4b50-b823-f75c116dd37c",
"metadata": {},
"outputs": [],
@@ -544,7 +1840,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 26,
"id": "350b09b9-451f-4d47-81fe-f34b892db027",
"metadata": {},
"outputs": [],
@@ -632,7 +1928,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 27,
"id": "0fccc8ef-e575-4857-a401-94a7274394df",
"metadata": {},
"outputs": [
@@ -785,7 +2081,7 @@
"4 indiv entrées tp "
]
},
- "execution_count": 32,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -797,7 +2093,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 28,
"id": "779d8aaf-6668-4f66-8852-847304407ea3",
"metadata": {},
"outputs": [
@@ -967,7 +2263,7 @@
"4 spectacle vivant mucem "
]
},
- "execution_count": 33,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -979,7 +2275,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 29,
"id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0",
"metadata": {},
"outputs": [
@@ -1078,7 +2374,7 @@
"4 37 383 269 1"
]
},
- "execution_count": 34,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -1098,7 +2394,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 30,
"id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba",
"metadata": {},
"outputs": [],
@@ -1126,7 +2422,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 31,
"id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951",
"metadata": {},
"outputs": [
@@ -1330,7 +2626,7 @@
"4 1 8.5 False non défini mucem "
]
},
- "execution_count": 36,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -1340,10 +2636,35 @@
"products_global.head()"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7",
+ "metadata": {},
+ "source": [
+ "# Fusion des bases locales"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fusion liée au product\n",
+ "df1_product_purchased = pd.merge(df1_tickets_kpi, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n",
+ "\n",
+ "# Fusion liée au customer\n",
+ "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
+ "\n",
+ "# Fusion product et customer\n",
+ "df1_customer_product = pd.merge(df1_customer, df1_product_purchased, on = 'customer_id', how = 'left')"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
- "id": "117d172a-2195-4060-9245-96c6f637ebbd",
+ "id": "1e42a790-b215-4107-a969-85005da06ebd",
"metadata": {},
"outputs": [],
"source": []