diff --git a/Spectacle/Stat_desc.ipynb b/Spectacle/Stat_desc.ipynb
index 80a3be3..2fb2c0d 100644
--- a/Spectacle/Stat_desc.ipynb
+++ b/Spectacle/Stat_desc.ipynb
@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 2,
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
"metadata": {},
"outputs": [],
@@ -34,7 +34,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 3,
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
@@ -46,7 +46,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 4,
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
@@ -59,7 +59,7 @@
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
- "execution_count": 9,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -75,7 +75,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
@@ -89,7 +89,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 6,
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
@@ -109,7 +109,512 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 10,
+ "id": "c56decc3-de19-4786-82a4-1386c72a6bfb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_name | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1165098 | \n",
+ " 618562 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1165100 | \n",
+ " 618559 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1165101 | \n",
+ " 618561 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1165102 | \n",
+ " 618560 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1165103 | \n",
+ " 618558 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 69253 | \n",
+ " 1698158 | \n",
+ " 18580 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 69254 | \n",
+ " 1698159 | \n",
+ " 18569 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 69255 | \n",
+ " 1698160 | \n",
+ " 2962 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 69256 | \n",
+ " 1698161 | \n",
+ " 3825 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 69257 | \n",
+ " 1698162 | \n",
+ " 5731 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
69258 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_name target_type_is_import \\\n",
+ "0 1165098 618562 Newsletter mensuelle False \n",
+ "1 1165100 618559 Newsletter mensuelle False \n",
+ "2 1165101 618561 Newsletter mensuelle False \n",
+ "3 1165102 618560 Newsletter mensuelle False \n",
+ "4 1165103 618558 Newsletter mensuelle False \n",
+ "... ... ... ... ... \n",
+ "69253 1698158 18580 Newsletter mensuelle False \n",
+ "69254 1698159 18569 Newsletter mensuelle False \n",
+ "69255 1698160 2962 Newsletter mensuelle False \n",
+ "69256 1698161 3825 Newsletter mensuelle False \n",
+ "69257 1698162 5731 Newsletter mensuelle False \n",
+ "\n",
+ " target_type_name \n",
+ "0 manual_static_filter \n",
+ "1 manual_static_filter \n",
+ "2 manual_static_filter \n",
+ "3 manual_static_filter \n",
+ "4 manual_static_filter \n",
+ "... ... \n",
+ "69253 manual_static_filter \n",
+ "69254 manual_static_filter \n",
+ "69255 manual_static_filter \n",
+ "69256 manual_static_filter \n",
+ "69257 manual_static_filter \n",
+ "\n",
+ "[69258 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "target_information"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "c825d64b-356c-4b71-aa3c-90e0dd7ca092",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ticket_id | \n",
+ " customer_id | \n",
+ " purchase_id | \n",
+ " event_type_id | \n",
+ " supplier_name | \n",
+ " purchase_date | \n",
+ " amount | \n",
+ " is_full_price | \n",
+ " name_event_types | \n",
+ " name_facilities | \n",
+ " name_categories | \n",
+ " name_events | \n",
+ " name_seasons | \n",
+ " start_date_time | \n",
+ " end_date_time | \n",
+ " open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1799177 | \n",
+ " 36984 | \n",
+ " 409613 | \n",
+ " 2 | \n",
+ " guichet | \n",
+ " 2016-04-28 17:58:26+02:00 | \n",
+ " 9.0 | \n",
+ " False | \n",
+ " danse | \n",
+ " le grand t | \n",
+ " abo t gourmand jeune | \n",
+ " aringa rossa | \n",
+ " test 2016/2017 | \n",
+ " 2016-09-27 00:00:00+02:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1799178 | \n",
+ " 36984 | \n",
+ " 409613 | \n",
+ " 3 | \n",
+ " guichet | \n",
+ " 2016-04-28 17:58:26+02:00 | \n",
+ " 9.0 | \n",
+ " False | \n",
+ " cirque | \n",
+ " le grand t | \n",
+ " abo t gourmand jeune | \n",
+ " 5èmes hurlants | \n",
+ " test 2016/2017 | \n",
+ " 2016-11-18 00:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1799179 | \n",
+ " 36984 | \n",
+ " 409613 | \n",
+ " 1 | \n",
+ " guichet | \n",
+ " 2016-04-28 17:58:26+02:00 | \n",
+ " 9.0 | \n",
+ " False | \n",
+ " théâtre | \n",
+ " le grand t | \n",
+ " abo t gourmand jeune | \n",
+ " dom juan | \n",
+ " test 2016/2017 | \n",
+ " 2016-12-07 00:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1799180 | \n",
+ " 36984 | \n",
+ " 409613 | \n",
+ " 1 | \n",
+ " guichet | \n",
+ " 2016-04-28 17:58:26+02:00 | \n",
+ " 9.0 | \n",
+ " False | \n",
+ " théâtre | \n",
+ " le grand t | \n",
+ " abo t gourmand jeune | \n",
+ " vanishing point | \n",
+ " test 2016/2017 | \n",
+ " 2017-01-04 00:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1799181 | \n",
+ " 36984 | \n",
+ " 409613 | \n",
+ " 3 | \n",
+ " guichet | \n",
+ " 2016-04-28 17:58:26+02:00 | \n",
+ " 12.0 | \n",
+ " False | \n",
+ " cirque | \n",
+ " la cite des congres | \n",
+ " abo t gourmand jeune | \n",
+ " a o lang pho | \n",
+ " test 2016/2017 | \n",
+ " 2017-01-03 00:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 492309 | \n",
+ " 3252232 | \n",
+ " 621716 | \n",
+ " 710062 | \n",
+ " 1 | \n",
+ " guichet | \n",
+ " 2023-03-09 12:08:45+01:00 | \n",
+ " 7.0 | \n",
+ " False | \n",
+ " théâtre | \n",
+ " cap nort | \n",
+ " tarif sco co 1 seance scolaire | \n",
+ " sur moi, le temps | \n",
+ " 2022/2023 | \n",
+ " 2023-03-13 14:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 492310 | \n",
+ " 3252233 | \n",
+ " 621716 | \n",
+ " 710062 | \n",
+ " 1 | \n",
+ " guichet | \n",
+ " 2023-03-09 12:08:45+01:00 | \n",
+ " 7.0 | \n",
+ " False | \n",
+ " théâtre | \n",
+ " cap nort | \n",
+ " tarif sco co 1 seance scolaire | \n",
+ " sur moi, le temps | \n",
+ " 2022/2023 | \n",
+ " 2023-03-13 14:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 492311 | \n",
+ " 3252234 | \n",
+ " 621716 | \n",
+ " 710062 | \n",
+ " 1 | \n",
+ " guichet | \n",
+ " 2023-03-09 12:08:45+01:00 | \n",
+ " 7.0 | \n",
+ " False | \n",
+ " théâtre | \n",
+ " cap nort | \n",
+ " tarif sco co 1 seance scolaire | \n",
+ " sur moi, le temps | \n",
+ " 2022/2023 | \n",
+ " 2023-03-13 14:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 492312 | \n",
+ " 3252235 | \n",
+ " 621716 | \n",
+ " 710062 | \n",
+ " 1 | \n",
+ " guichet | \n",
+ " 2023-03-09 12:08:45+01:00 | \n",
+ " 7.0 | \n",
+ " False | \n",
+ " théâtre | \n",
+ " cap nort | \n",
+ " tarif sco co 1 seance scolaire | \n",
+ " sur moi, le temps | \n",
+ " 2022/2023 | \n",
+ " 2023-03-13 14:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 492313 | \n",
+ " 3252236 | \n",
+ " 621716 | \n",
+ " 710062 | \n",
+ " 1 | \n",
+ " guichet | \n",
+ " 2023-03-09 12:08:45+01:00 | \n",
+ " 7.0 | \n",
+ " False | \n",
+ " théâtre | \n",
+ " cap nort | \n",
+ " tarif sco co 1 seance scolaire | \n",
+ " sur moi, le temps | \n",
+ " 2022/2023 | \n",
+ " 2023-03-13 14:00:00+01:00 | \n",
+ " 1901-01-01 00:09:21+00:09 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
492314 rows × 16 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
+ "0 1799177 36984 409613 2 guichet \n",
+ "1 1799178 36984 409613 3 guichet \n",
+ "2 1799179 36984 409613 1 guichet \n",
+ "3 1799180 36984 409613 1 guichet \n",
+ "4 1799181 36984 409613 3 guichet \n",
+ "... ... ... ... ... ... \n",
+ "492309 3252232 621716 710062 1 guichet \n",
+ "492310 3252233 621716 710062 1 guichet \n",
+ "492311 3252234 621716 710062 1 guichet \n",
+ "492312 3252235 621716 710062 1 guichet \n",
+ "492313 3252236 621716 710062 1 guichet \n",
+ "\n",
+ " purchase_date amount is_full_price name_event_types \\\n",
+ "0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
+ "1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
+ "2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
+ "3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
+ "4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
+ "... ... ... ... ... \n",
+ "492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
+ "492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
+ "492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
+ "492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
+ "492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
+ "\n",
+ " name_facilities name_categories \\\n",
+ "0 le grand t abo t gourmand jeune \n",
+ "1 le grand t abo t gourmand jeune \n",
+ "2 le grand t abo t gourmand jeune \n",
+ "3 le grand t abo t gourmand jeune \n",
+ "4 la cite des congres abo t gourmand jeune \n",
+ "... ... ... \n",
+ "492309 cap nort tarif sco co 1 seance scolaire \n",
+ "492310 cap nort tarif sco co 1 seance scolaire \n",
+ "492311 cap nort tarif sco co 1 seance scolaire \n",
+ "492312 cap nort tarif sco co 1 seance scolaire \n",
+ "492313 cap nort tarif sco co 1 seance scolaire \n",
+ "\n",
+ " name_events name_seasons start_date_time \\\n",
+ "0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
+ "1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
+ "2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
+ "3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
+ "4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
+ "... ... ... ... \n",
+ "492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
+ "492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
+ "492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
+ "492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
+ "492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
+ "\n",
+ " end_date_time open \n",
+ "0 1901-01-01 00:09:21+00:09 True \n",
+ "1 1901-01-01 00:09:21+00:09 True \n",
+ "2 1901-01-01 00:09:21+00:09 True \n",
+ "3 1901-01-01 00:09:21+00:09 True \n",
+ "4 1901-01-01 00:09:21+00:09 True \n",
+ "... ... ... \n",
+ "492309 1901-01-01 00:09:21+00:09 True \n",
+ "492310 1901-01-01 00:09:21+00:09 True \n",
+ "492311 1901-01-01 00:09:21+00:09 True \n",
+ "492312 1901-01-01 00:09:21+00:09 True \n",
+ "492313 1901-01-01 00:09:21+00:09 True \n",
+ "\n",
+ "[492314 rows x 16 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "products_purchased_reduced"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
"metadata": {},
"outputs": [
@@ -124,7 +629,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -139,7 +644,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -154,7 +659,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -169,7 +674,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
":27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
@@ -181,21 +686,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
- " df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
+ "File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n",
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
@@ -203,7 +694,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ " df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -218,7 +711,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -233,7 +726,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
":27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
@@ -252,7 +745,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -267,7 +760,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -282,9 +775,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
- "/tmp/ipykernel_1173/2987234667.py:8: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -299,7 +792,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
":27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
@@ -318,7 +811,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -333,7 +826,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -348,7 +841,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -363,7 +856,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
":27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
@@ -382,7 +875,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -397,7 +890,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -412,9 +905,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
- "/tmp/ipykernel_1173/2987234667.py:8: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
@@ -429,7 +922,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
+ "/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
":27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
@@ -642,164 +1135,20 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 1,
"id": "b9b6ec1f-36fb-4ee9-a1ed-09ff41878005",
"metadata": {},
"outputs": [
{
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " street_id | \n",
- " structure_id | \n",
- " mcp_contact_id | \n",
- " fidelity | \n",
- " tenant_id | \n",
- " is_partner | \n",
- " deleted_at | \n",
- " gender | \n",
- " is_email_true | \n",
- " ... | \n",
- " purchase_count | \n",
- " first_buying_date | \n",
- " country | \n",
- " gender_label | \n",
- " gender_female | \n",
- " gender_male | \n",
- " gender_other | \n",
- " country_fr | \n",
- " has_tags | \n",
- " number_compagny | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 74896 | \n",
- " 1 | \n",
- " 139 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 875 | \n",
- " False | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " ... | \n",
- " 19 | \n",
- " NaN | \n",
- " NaN | \n",
- " other | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " NaN | \n",
- " 0 | \n",
- " 10 | \n",
- "
\n",
- " \n",
- " 53006 | \n",
- " 1 | \n",
- " 312707 | \n",
- " NaN | \n",
- " NaN | \n",
- " 0 | \n",
- " 1556 | \n",
- " False | \n",
- " NaN | \n",
- " 0 | \n",
- " True | \n",
- " ... | \n",
- " 10 | \n",
- " 2018-12-26 13:06:49+00:00 | \n",
- " fr | \n",
- " female | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1.0 | \n",
- " 0 | \n",
- " 11 | \n",
- "
\n",
- " \n",
- " 142042 | \n",
- " 1 | \n",
- " 2772 | \n",
- " NaN | \n",
- " 3954.0 | \n",
- " 2 | \n",
- " 862 | \n",
- " False | \n",
- " NaN | \n",
- " 2 | \n",
- " True | \n",
- " ... | \n",
- " 2 | \n",
- " 2016-09-09 17:02:00+00:00 | \n",
- " fr | \n",
- " other | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1.0 | \n",
- " 0 | \n",
- " 14 | \n",
- "
\n",
- " \n",
- "
\n",
- "
3 rows × 29 columns
\n",
- "
"
- ],
- "text/plain": [
- " customer_id street_id structure_id mcp_contact_id fidelity \\\n",
- "74896 1 139 NaN NaN 0 \n",
- "53006 1 312707 NaN NaN 0 \n",
- "142042 1 2772 NaN 3954.0 2 \n",
- "\n",
- " tenant_id is_partner deleted_at gender is_email_true ... \\\n",
- "74896 875 False NaN 2 False ... \n",
- "53006 1556 False NaN 0 True ... \n",
- "142042 862 False NaN 2 True ... \n",
- "\n",
- " purchase_count first_buying_date country gender_label \\\n",
- "74896 19 NaN NaN other \n",
- "53006 10 2018-12-26 13:06:49+00:00 fr female \n",
- "142042 2 2016-09-09 17:02:00+00:00 fr other \n",
- "\n",
- " gender_female gender_male gender_other country_fr has_tags \\\n",
- "74896 0 0 1 NaN 0 \n",
- "53006 1 0 0 1.0 0 \n",
- "142042 0 0 1 1.0 0 \n",
- "\n",
- " number_compagny \n",
- "74896 10 \n",
- "53006 11 \n",
- "142042 14 \n",
- "\n",
- "[3 rows x 29 columns]"
- ]
- },
- "execution_count": 38,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "NameError",
+ "evalue": "name 'customerplus_clean_spectacle' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcustomerplus_clean_spectacle\u001b[49m[customerplus_clean_spectacle[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'customerplus_clean_spectacle' is not defined"
+ ]
}
],
"source": [
@@ -3465,7 +3814,7 @@
},
{
"cell_type": "code",
- "execution_count": 223,
+ "execution_count": 8,
"id": "74534ded-8121-43fb-8cf8-af353bed2c77",
"metadata": {},
"outputs": [
@@ -3493,7 +3842,7 @@
"dtype: int64"
]
},
- "execution_count": 223,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -3651,6 +4000,20 @@
"products_purchased_reduced_spectacle.head()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e1a95b8f-6539-48bd-b09d-6f8f63d25fb2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#nombre de ticket par compagnie\n",
+ "\n",
+ "company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
+ "company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
+ "company_campaigns_stats"
+ ]
+ },
{
"cell_type": "markdown",
"id": "b9e84af4-a02b-4f83-81ae-b7a73475d060",
@@ -3694,6 +4057,193 @@
"print(\"Nombre de lignes de la table : \",target_information_spectacle.shape[0])\n",
"target_information_spectacle.isna().sum()"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "985b6403-3c75-420e-a4a4-d3045213e9ef",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_name | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ " number_compagny | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1165098 | \n",
+ " 618562 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1165100 | \n",
+ " 618559 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1165101 | \n",
+ " 618561 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1165102 | \n",
+ " 618560 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1165103 | \n",
+ " 618558 | \n",
+ " Newsletter mensuelle | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 779653 | \n",
+ " 4207082 | \n",
+ " 6764876 | \n",
+ " INSCRIPTION NL VOYAGES HUMA | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 779654 | \n",
+ " 4207083 | \n",
+ " 6764877 | \n",
+ " Inscriptions newsletters (depuis 2019) | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 779655 | \n",
+ " 4207084 | \n",
+ " 6801322 | \n",
+ " Inscriptions newsletters (depuis 2019) | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 779656 | \n",
+ " 4207085 | \n",
+ " 6837768 | \n",
+ " Inscriptions newsletters (depuis 2019) | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 779657 | \n",
+ " 4207086 | \n",
+ " 6837769 | \n",
+ " Inscriptions newsletters (depuis 2019) | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6240166 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_name \\\n",
+ "0 1165098 618562 Newsletter mensuelle \n",
+ "1 1165100 618559 Newsletter mensuelle \n",
+ "2 1165101 618561 Newsletter mensuelle \n",
+ "3 1165102 618560 Newsletter mensuelle \n",
+ "4 1165103 618558 Newsletter mensuelle \n",
+ "... ... ... ... \n",
+ "779653 4207082 6764876 INSCRIPTION NL VOYAGES HUMA \n",
+ "779654 4207083 6764877 Inscriptions newsletters (depuis 2019) \n",
+ "779655 4207084 6801322 Inscriptions newsletters (depuis 2019) \n",
+ "779656 4207085 6837768 Inscriptions newsletters (depuis 2019) \n",
+ "779657 4207086 6837769 Inscriptions newsletters (depuis 2019) \n",
+ "\n",
+ " target_type_is_import target_type_name number_compagny \n",
+ "0 False manual_static_filter 10 \n",
+ "1 False manual_static_filter 10 \n",
+ "2 False manual_static_filter 10 \n",
+ "3 False manual_static_filter 10 \n",
+ "4 False manual_static_filter 10 \n",
+ "... ... ... ... \n",
+ "779653 False manual_static_filter 14 \n",
+ "779654 False manual_static_filter 14 \n",
+ "779655 False manual_static_filter 14 \n",
+ "779656 False manual_static_filter 14 \n",
+ "779657 False manual_static_filter 14 \n",
+ "\n",
+ "[6240166 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "target_information_spectacle"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0a180f0a-c6de-4e66-9ae8-fdbfdf8837c9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
diff --git a/code_base_train_test.ipynb b/code_base_train_test.ipynb
new file mode 100644
index 0000000..b7e6578
--- /dev/null
+++ b/code_base_train_test.ipynb
@@ -0,0 +1,814 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "bf34b03c-536f-4f93-93a5-e452552653aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdin",
+ "output_type": "stream",
+ "text": [
+ "Choisissez le type de compagnie : sport ? musique ? musee ? musique\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n",
+ "Couverture Company 10 : 2016-03-07 - 2023-09-25\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n",
+ "Couverture Company 11 : 2015-06-26 - 2023-11-08\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n",
+ "Couverture Company 12 : 2016-06-14 - 2023-11-08\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n",
+ "Couverture Company 13 : 2010-07-31 - 2023-11-08\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n",
+ "Couverture Company 14 : 1901-01-01 - 2023-11-08\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset test : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset train : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset test : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset train : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset test : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset train : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset test : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset train : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset test : SUCCESS\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n",
+ "File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n",
+ "Data filtering : SUCCESS\n",
+ "KPIs construction : SUCCESS\n",
+ "Explanatory variable construction : SUCCESS\n",
+ "Explained variable construction : SUCCESS\n",
+ "Exportation dataset train : SUCCESS\n",
+ "FIN DE LA GENERATION DES DATASETS : SUCCESS\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Business Data Challenge - Team 1\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import s3fs\n",
+ "import re\n",
+ "import warnings\n",
+ "from datetime import date, timedelta, datetime\n",
+ "\n",
+ "# Create filesystem object\n",
+ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
+ "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
+ "\n",
+ "\n",
+ "# Import KPI construction functions\n",
+ "exec(open('0_KPI_functions.py').read())\n",
+ "\n",
+ "# Ignore warning\n",
+ "warnings.filterwarnings('ignore')\n",
+ "\n",
+ "\n",
+ "def display_covering_time(df, company, datecover):\n",
+ " \"\"\"\n",
+ " This function draws the time coverage of each company\n",
+ " \"\"\"\n",
+ " min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
+ " max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
+ " datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
+ " print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
+ " return datecover\n",
+ "\n",
+ "\n",
+ "def compute_time_intersection(datecover):\n",
+ " \"\"\"\n",
+ " This function returns the time coverage for all companies\n",
+ " \"\"\"\n",
+ " timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
+ " intersection = set.intersection(*timestamps_sets)\n",
+ " intersection_list = list(intersection)\n",
+ " formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
+ " return sorted(formated_dates)\n",
+ "\n",
+ "\n",
+ "def df_coverage_modelization(sport, coverage_train = 0.7):\n",
+ " \"\"\"\n",
+ " This function returns start_date, end_of_features and final dates\n",
+ " that help to construct train and test datasets\n",
+ " \"\"\"\n",
+ " datecover = {}\n",
+ " for company in sport:\n",
+ " df_products_purchased_reduced = display_databases(company, file_name = \"products_purchased_reduced\",\n",
+ " datetime_col = ['purchase_date'])\n",
+ " datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
+ " #print(datecover.keys())\n",
+ " dt_coverage = compute_time_intersection(datecover)\n",
+ " start_date = dt_coverage[0]\n",
+ " end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
+ " final_date = dt_coverage[-1]\n",
+ " return start_date, end_of_features, final_date\n",
+ " \n",
+ "\n",
+ "def dataset_construction(min_date, end_features_date, max_date, directory_path):\n",
+ " \n",
+ " # Import customerplus\n",
+ " df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
+ " df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
+ " df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
+ " \n",
+ " # Filtre de cohérence pour la mise en pratique de notre méthode\n",
+ " max_date = pd.to_datetime(max_date, utc = True, format = 'ISO8601') \n",
+ " end_features_date = pd.to_datetime(end_features_date, utc = True, format = 'ISO8601')\n",
+ " min_date = pd.to_datetime(min_date, utc = True, format = 'ISO8601')\n",
+ "\n",
+ " #Filtre de la base df_campaigns_information\n",
+ " df_campaigns_information = df_campaigns_information[(df_campaigns_information['sent_at'] <= end_features_date) & (df_campaigns_information['sent_at'] >= min_date)]\n",
+ " df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
+ " \n",
+ " #Filtre de la base df_products_purchased_reduced\n",
+ " df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]\n",
+ "\n",
+ " print(\"Data filtering : SUCCESS\")\n",
+ " \n",
+ " # Fusion de l'ensemble et creation des KPI\n",
+ "\n",
+ " # KPI sur les campagnes publicitaires\n",
+ " df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
+ "\n",
+ " # KPI sur le comportement d'achat\n",
+ " df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
+ "\n",
+ " # KPI sur les données socio-démographiques\n",
+ " df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
+ " \n",
+ " print(\"KPIs construction : SUCCESS\")\n",
+ " \n",
+ " # Fusion avec KPI liés au customer\n",
+ " df_customer = pd.merge(df_customerplus_clean, df_campaigns_kpi, on = 'customer_id', how = 'left')\n",
+ " \n",
+ " # Fill NaN values\n",
+ " df_customer[['nb_campaigns', 'nb_campaigns_opened']] = df_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)\n",
+ " \n",
+ " # Fusion avec KPI liés au comportement d'achat\n",
+ " df_customer_product = pd.merge(df_tickets_kpi, df_customer, on = 'customer_id', how = 'outer')\n",
+ " \n",
+ " # Fill NaN values\n",
+ " df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)\n",
+ "\n",
+ " print(\"Explanatory variable construction : SUCCESS\")\n",
+ "\n",
+ " # 2. Construction of the explained variable \n",
+ " df_products_purchased_to_predict = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= max_date) & (df_products_purchased_reduced['purchase_date'] > end_features_date)]\n",
+ "\n",
+ " # Indicatrice d'achat\n",
+ " df_products_purchased_to_predict['y_has_purchased'] = 1\n",
+ "\n",
+ " y = df_products_purchased_to_predict[['customer_id', 'y_has_purchased']].drop_duplicates()\n",
+ "\n",
+ " print(\"Explained variable construction : SUCCESS\")\n",
+ " \n",
+ " # 3. Merge between explained and explanatory variables\n",
+ " dataset = pd.merge(df_customer_product, y, on = ['customer_id'], how = 'left')\n",
+ "\n",
+ " # 0 if there is no purchase\n",
+ " dataset[['y_has_purchased']].fillna(0)\n",
+ "\n",
+ " # add id_company prefix to customer_id\n",
+ " dataset['customer_id'] = directory_path + '_' + dataset['customer_id'].astype('str')\n",
+ " \n",
+ " return dataset\n",
+ "\n",
+ "## Exportation\n",
+ "\n",
+ "companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
+ " 'sport': ['5', '6', '7', '8', '9'],\n",
+ " 'musique' : ['10', '11', '12', '13', '14']}\n",
+ "\n",
+ "type_of_comp = input('Choisissez le type de compagnie : sport ? musique ? musee ?')\n",
+ "list_of_comp = companies[type_of_comp] \n",
+ "# Dossier d'exportation\n",
+ "BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}'\n",
+ "\n",
+ "# Create test dataset and train dataset for sport companies\n",
+ "\n",
+ "start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)\n",
+ "\n",
+ "for company in list_of_comp:\n",
+ " dataset_test = dataset_construction(min_date = start_date, end_features_date = end_of_features,\n",
+ " max_date = final_date, directory_path = company) \n",
+ "\n",
+ " # Exportation\n",
+ " FILE_KEY_OUT_S3 = \"dataset_test\" + company + \".csv\"\n",
+ " FILE_PATH_OUT_S3 = BUCKET_OUT + \"/Test_set/\" + FILE_KEY_OUT_S3\n",
+ " \n",
+ " with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
+ " dataset_test.to_csv(file_out, index = False)\n",
+ " \n",
+ " print(\"Exportation dataset test : SUCCESS\")\n",
+ "\n",
+ "# Dataset train\n",
+ " dataset_train = dataset_construction(min_date = start_date, end_features_date = end_of_features,\n",
+ " max_date = final_date, directory_path = company)\n",
+ " # Export\n",
+ " FILE_KEY_OUT_S3 = \"dataset_train\" + company + \".csv\" \n",
+ " FILE_PATH_OUT_S3 = BUCKET_OUT + \"/Train_test/\" + FILE_KEY_OUT_S3\n",
+ " \n",
+ " with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
+ " dataset_train.to_csv(file_out, index = False)\n",
+ " \n",
+ " print(\"Exportation dataset train : SUCCESS\")\n",
+ "\n",
+ "\n",
+ "print(\"FIN DE LA GENERATION DES DATASETS : SUCCESS\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "3721427e-5957-4556-b278-2e7ffca892f4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'projet-bdc2324-team1/Generalization/musique/Train_test/dataset_train14.csv'"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "FILE_PATH_OUT_S3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "a3bfeeb6-2db0-4f1d-866c-8721343e97c5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer_id 0.000000\n",
+ "nb_tickets 0.000000\n",
+ "nb_purchases 0.000000\n",
+ "total_amount 0.000000\n",
+ "nb_suppliers 0.000000\n",
+ "vente_internet_max 0.000000\n",
+ "purchase_date_min 0.858950\n",
+ "purchase_date_max 0.858950\n",
+ "time_between_purchase 0.858950\n",
+ "nb_tickets_internet 0.000000\n",
+ "street_id 0.000000\n",
+ "structure_id 0.869838\n",
+ "mcp_contact_id 0.276677\n",
+ "fidelity 0.000000\n",
+ "tenant_id 0.000000\n",
+ "is_partner 0.000000\n",
+ "deleted_at 1.000000\n",
+ "gender 0.000000\n",
+ "is_email_true 0.000000\n",
+ "opt_in 0.000000\n",
+ "last_buying_date 0.709626\n",
+ "max_price 0.709626\n",
+ "ticket_sum 0.000000\n",
+ "average_price 0.709626\n",
+ "average_purchase_delay 0.709731\n",
+ "average_price_basket 0.709731\n",
+ "average_ticket_basket 0.709731\n",
+ "total_price 0.000000\n",
+ "purchase_count 0.000000\n",
+ "first_buying_date 0.709626\n",
+ "country 0.152090\n",
+ "gender_label 0.000000\n",
+ "gender_female 0.000000\n",
+ "gender_male 0.000000\n",
+ "gender_other 0.000000\n",
+ "country_fr 0.152090\n",
+ "has_tags 0.000000\n",
+ "nb_campaigns 0.000000\n",
+ "nb_campaigns_opened 0.000000\n",
+ "time_to_open 0.848079\n",
+ "y_has_purchased 1.000000\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " dataset_train.isna().sum()/dataset_train.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a4c4e994-231b-4467-aa1b-0a5283c59dd5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " nb_tickets | \n",
+ " nb_purchases | \n",
+ " total_amount | \n",
+ " nb_suppliers | \n",
+ " vente_internet_max | \n",
+ " purchase_date_min | \n",
+ " purchase_date_max | \n",
+ " time_between_purchase | \n",
+ " nb_tickets_internet | \n",
+ " ... | \n",
+ " gender_label | \n",
+ " gender_female | \n",
+ " gender_male | \n",
+ " gender_other | \n",
+ " country_fr | \n",
+ " has_tags | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ " y_has_purchased | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 14_1 | \n",
+ " 2.0 | \n",
+ " 2.0 | \n",
+ " 70.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1464.938449 | \n",
+ " 1464.938021 | \n",
+ " 0.000428 | \n",
+ " 2.0 | \n",
+ " ... | \n",
+ " other | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 9.0 | \n",
+ " 1.0 | \n",
+ " 0 days 00:36:13 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14_2 | \n",
+ " 7.0 | \n",
+ " 3.0 | \n",
+ " 145.0 | \n",
+ " 2.0 | \n",
+ " 1.0 | \n",
+ " 1466.030116 | \n",
+ " 365.335000 | \n",
+ " 1100.695116 | \n",
+ " 7.0 | \n",
+ " ... | \n",
+ " male | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 1 | \n",
+ " 9.0 | \n",
+ " 4.0 | \n",
+ " 0 days 02:30:09.250000 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 14_3 | \n",
+ " 2.0 | \n",
+ " 2.0 | \n",
+ " 70.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1476.907894 | \n",
+ " 1476.907662 | \n",
+ " 0.000231 | \n",
+ " 2.0 | \n",
+ " ... | \n",
+ " female | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 0 days 20:58:45 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 14_4 | \n",
+ " 2.0 | \n",
+ " 2.0 | \n",
+ " 32.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1465.907894 | \n",
+ " 1465.907465 | \n",
+ " 0.000428 | \n",
+ " 2.0 | \n",
+ " ... | \n",
+ " male | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 6.0 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 14_5 | \n",
+ " 2.0 | \n",
+ " 2.0 | \n",
+ " 70.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1465.373866 | \n",
+ " 1465.373819 | \n",
+ " 0.000046 | \n",
+ " 2.0 | \n",
+ " ... | \n",
+ " female | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 7.0 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 343121 | \n",
+ " 14_6884748 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " male | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 343122 | \n",
+ " 14_6884749 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " male | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 343123 | \n",
+ " 14_6884750 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " male | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 343124 | \n",
+ " 14_6884751 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " female | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 343125 | \n",
+ " 14_6884753 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " male | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
343126 rows × 41 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
+ "0 14_1 2.0 2.0 70.0 1.0 \n",
+ "1 14_2 7.0 3.0 145.0 2.0 \n",
+ "2 14_3 2.0 2.0 70.0 1.0 \n",
+ "3 14_4 2.0 2.0 32.0 1.0 \n",
+ "4 14_5 2.0 2.0 70.0 1.0 \n",
+ "... ... ... ... ... ... \n",
+ "343121 14_6884748 0.0 0.0 0.0 0.0 \n",
+ "343122 14_6884749 0.0 0.0 0.0 0.0 \n",
+ "343123 14_6884750 0.0 0.0 0.0 0.0 \n",
+ "343124 14_6884751 0.0 0.0 0.0 0.0 \n",
+ "343125 14_6884753 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 1.0 1464.938449 1464.938021 \n",
+ "1 1.0 1466.030116 365.335000 \n",
+ "2 1.0 1476.907894 1476.907662 \n",
+ "3 1.0 1465.907894 1465.907465 \n",
+ "4 1.0 1465.373866 1465.373819 \n",
+ "... ... ... ... \n",
+ "343121 0.0 NaN NaN \n",
+ "343122 0.0 NaN NaN \n",
+ "343123 0.0 NaN NaN \n",
+ "343124 0.0 NaN NaN \n",
+ "343125 0.0 NaN NaN \n",
+ "\n",
+ " time_between_purchase nb_tickets_internet ... gender_label \\\n",
+ "0 0.000428 2.0 ... other \n",
+ "1 1100.695116 7.0 ... male \n",
+ "2 0.000231 2.0 ... female \n",
+ "3 0.000428 2.0 ... male \n",
+ "4 0.000046 2.0 ... female \n",
+ "... ... ... ... ... \n",
+ "343121 NaN 0.0 ... male \n",
+ "343122 NaN 0.0 ... male \n",
+ "343123 NaN 0.0 ... male \n",
+ "343124 NaN 0.0 ... female \n",
+ "343125 NaN 0.0 ... male \n",
+ "\n",
+ " gender_female gender_male gender_other country_fr has_tags \\\n",
+ "0 0 0 1 1.0 0 \n",
+ "1 0 1 0 1.0 1 \n",
+ "2 1 0 0 1.0 0 \n",
+ "3 0 1 0 1.0 0 \n",
+ "4 1 0 0 1.0 0 \n",
+ "... ... ... ... ... ... \n",
+ "343121 0 1 0 1.0 0 \n",
+ "343122 0 1 0 1.0 0 \n",
+ "343123 0 1 0 1.0 0 \n",
+ "343124 1 0 0 1.0 0 \n",
+ "343125 0 1 0 1.0 0 \n",
+ "\n",
+ " nb_campaigns nb_campaigns_opened time_to_open \\\n",
+ "0 9.0 1.0 0 days 00:36:13 \n",
+ "1 9.0 4.0 0 days 02:30:09.250000 \n",
+ "2 6.0 1.0 0 days 20:58:45 \n",
+ "3 6.0 0.0 NaT \n",
+ "4 7.0 0.0 NaT \n",
+ "... ... ... ... \n",
+ "343121 0.0 0.0 NaT \n",
+ "343122 0.0 0.0 NaT \n",
+ "343123 0.0 0.0 NaT \n",
+ "343124 0.0 0.0 NaT \n",
+ "343125 0.0 0.0 NaT \n",
+ "\n",
+ " y_has_purchased \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "... ... \n",
+ "343121 NaN \n",
+ "343122 NaN \n",
+ "343123 NaN \n",
+ "343124 NaN \n",
+ "343125 NaN \n",
+ "\n",
+ "[343126 rows x 41 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "75f9a672-641f-49a2-a8d6-7673845506f5",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}