diff --git a/Spectacle/Exploration_spectacle.ipynb b/Spectacle/Exploration_spectacle.ipynb index 9fa6532..841d297 100644 --- a/Spectacle/Exploration_spectacle.ipynb +++ b/Spectacle/Exploration_spectacle.ipynb @@ -17,30 +17,805 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "37977b4e-42e7-4d8e-8b9a-6843292fd128", "metadata": {}, "outputs": [], "source": [ "# Import KPI construction functions\n", - "# exec(open('0_KPI_functions.py').read())\n", - "exec(open('../0_KPI_functions.py').read())\n" + "exec(open('0_KPI_functions.py').read())\n", + "# exec(open('../0_KPI_functions.py').read())\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "cca62d72-f809-41a9-bb06-1be7d6b09307", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", - "BUCKET = \"bdc2324-data\"\n", + "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", "fs.ls(BUCKET)" ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_438/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", + " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n" + ] + } + ], + "source": [ + "BUCKET = \"projet-bdc2324-team1\"\n", + "FILE_KEY_S3 = \"0_Input/Company_10/products_purchased_reduced.csv\"\n", + "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "637aa400-f49a-4d8d-802a-868b241f8a9d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", + "for nom_base in dic_base:\n", + " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", + " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", + " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "e60529b5-986f-4685-91e1-782c2b022e09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcustomer_idtarget_nametarget_type_is_importtarget_type_name
01165098618562Newsletter mensuelleFalsemanual_static_filter
11165100618559Newsletter mensuelleFalsemanual_static_filter
21165101618561Newsletter mensuelleFalsemanual_static_filter
31165102618560Newsletter mensuelleFalsemanual_static_filter
41165103618558Newsletter mensuelleFalsemanual_static_filter
..................
69253169815818580Newsletter mensuelleFalsemanual_static_filter
69254169815918569Newsletter mensuelleFalsemanual_static_filter
6925516981602962Newsletter mensuelleFalsemanual_static_filter
6925616981613825Newsletter mensuelleFalsemanual_static_filter
6925716981625731Newsletter mensuelleFalsemanual_static_filter
\n", + "

69258 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " id customer_id target_name target_type_is_import \\\n", + "0 1165098 618562 Newsletter mensuelle False \n", + "1 1165100 618559 Newsletter mensuelle False \n", + "2 1165101 618561 Newsletter mensuelle False \n", + "3 1165102 618560 Newsletter mensuelle False \n", + "4 1165103 618558 Newsletter mensuelle False \n", + "... ... ... ... ... \n", + "69253 1698158 18580 Newsletter mensuelle False \n", + "69254 1698159 18569 Newsletter mensuelle False \n", + "69255 1698160 2962 Newsletter mensuelle False \n", + "69256 1698161 3825 Newsletter mensuelle False \n", + "69257 1698162 5731 Newsletter mensuelle False \n", + "\n", + " target_type_name \n", + "0 manual_static_filter \n", + "1 manual_static_filter \n", + "2 manual_static_filter \n", + "3 manual_static_filter \n", + "4 manual_static_filter \n", + "... ... \n", + "69253 manual_static_filter \n", + "69254 manual_static_filter \n", + "69255 manual_static_filter \n", + "69256 manual_static_filter \n", + "69257 manual_static_filter \n", + "\n", + "[69258 rows x 5 columns]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target_information" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "6ece1bb3-5a2d-41f8-be96-eb70697881dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ticket_idcustomer_idpurchase_idevent_type_idsupplier_namepurchase_dateamountis_full_pricename_event_typesname_facilitiesname_categoriesname_eventsname_seasonsstart_date_timeend_date_timeopen
01799177369844096132guichet2016-04-28 15:58:26+00:009.0Falsedansele grand tabo t gourmand jeunearinga rossatest 2016/20172016-09-27 00:00:00+02:001901-01-01 00:09:21+00:09True
11799178369844096133guichet2016-04-28 15:58:26+00:009.0Falsecirquele grand tabo t gourmand jeune5èmes hurlantstest 2016/20172016-11-18 00:00:00+01:001901-01-01 00:09:21+00:09True
21799179369844096131guichet2016-04-28 15:58:26+00:009.0Falsethéâtrele grand tabo t gourmand jeunedom juantest 2016/20172016-12-07 00:00:00+01:001901-01-01 00:09:21+00:09True
31799180369844096131guichet2016-04-28 15:58:26+00:009.0Falsethéâtrele grand tabo t gourmand jeunevanishing pointtest 2016/20172017-01-04 00:00:00+01:001901-01-01 00:09:21+00:09True
41799181369844096133guichet2016-04-28 15:58:26+00:0012.0Falsecirquela cite des congresabo t gourmand jeunea o lang photest 2016/20172017-01-03 00:00:00+01:001901-01-01 00:09:21+00:09True
...................................................
49230932522326217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231032522336217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231132522346217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231232522356217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231332522366217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
\n", + "

492314 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " ticket_id customer_id purchase_id event_type_id supplier_name \\\n", + "0 1799177 36984 409613 2 guichet \n", + "1 1799178 36984 409613 3 guichet \n", + "2 1799179 36984 409613 1 guichet \n", + "3 1799180 36984 409613 1 guichet \n", + "4 1799181 36984 409613 3 guichet \n", + "... ... ... ... ... ... \n", + "492309 3252232 621716 710062 1 guichet \n", + "492310 3252233 621716 710062 1 guichet \n", + "492311 3252234 621716 710062 1 guichet \n", + "492312 3252235 621716 710062 1 guichet \n", + "492313 3252236 621716 710062 1 guichet \n", + "\n", + " purchase_date amount is_full_price name_event_types \\\n", + "0 2016-04-28 15:58:26+00:00 9.0 False danse \n", + "1 2016-04-28 15:58:26+00:00 9.0 False cirque \n", + "2 2016-04-28 15:58:26+00:00 9.0 False théâtre \n", + "3 2016-04-28 15:58:26+00:00 9.0 False théâtre \n", + "4 2016-04-28 15:58:26+00:00 12.0 False cirque \n", + "... ... ... ... ... \n", + "492309 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", + "492310 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", + "492311 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", + "492312 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", + "492313 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", + "\n", + " name_facilities name_categories \\\n", + "0 le grand t abo t gourmand jeune \n", + "1 le grand t abo t gourmand jeune \n", + "2 le grand t abo t gourmand jeune \n", + "3 le grand t abo t gourmand jeune \n", + "4 la cite des congres abo t gourmand jeune \n", + "... ... ... \n", + "492309 cap nort tarif sco co 1 seance scolaire \n", + "492310 cap nort tarif sco co 1 seance scolaire \n", + "492311 cap nort tarif sco co 1 seance scolaire \n", + "492312 cap nort tarif sco co 1 seance scolaire \n", + "492313 cap nort tarif sco co 1 seance scolaire \n", + "\n", + " name_events name_seasons start_date_time \\\n", + "0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n", + "1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n", + "2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n", + "3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n", + "4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n", + "... ... ... ... \n", + "492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", + "492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", + "492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", + "492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", + "492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", + "\n", + " end_date_time open \n", + "0 1901-01-01 00:09:21+00:09 True \n", + "1 1901-01-01 00:09:21+00:09 True \n", + "2 1901-01-01 00:09:21+00:09 True \n", + "3 1901-01-01 00:09:21+00:09 True \n", + "4 1901-01-01 00:09:21+00:09 True \n", + "... ... ... \n", + "492309 1901-01-01 00:09:21+00:09 True \n", + "492310 1901-01-01 00:09:21+00:09 True \n", + "492311 1901-01-01 00:09:21+00:09 True \n", + "492312 1901-01-01 00:09:21+00:09 True \n", + "492313 1901-01-01 00:09:21+00:09 True \n", + "\n", + "[492314 rows x 16 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "purchases" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "484979cc-d4a4-4d9d-9701-71a4f353a372", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_438/1359829443.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", + " campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n" + ] + } + ], + "source": [ + "BUCKET = \"projet-bdc2324-team1\"\n", + "FILE_KEY_S3 = \"0_Input/Company_10/campaigns_information.csv\"\n", + "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "553ca2e7-ead4-4508-8247-fcc602abd249", + "metadata": {}, + "outputs": [], + "source": [ + "BUCKET = \"projet-bdc2324-team1\"\n", + "FILE_KEY_S3 = \"0_Input/Company_10/target_information.csv\"\n", + "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " targets = pd.read_csv(file_in, sep=\",\")\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "17b89ca1-deea-4139-a6c0-7822cc4e7a90", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcustomer_idtarget_nametarget_type_is_importtarget_type_name
01165098618562Newsletter mensuelleFalsemanual_static_filter
11165100618559Newsletter mensuelleFalsemanual_static_filter
21165101618561Newsletter mensuelleFalsemanual_static_filter
31165102618560Newsletter mensuelleFalsemanual_static_filter
41165103618558Newsletter mensuelleFalsemanual_static_filter
..................
69253169815818580Newsletter mensuelleFalsemanual_static_filter
69254169815918569Newsletter mensuelleFalsemanual_static_filter
6925516981602962Newsletter mensuelleFalsemanual_static_filter
6925616981613825Newsletter mensuelleFalsemanual_static_filter
6925716981625731Newsletter mensuelleFalsemanual_static_filter
\n", + "

69258 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " id customer_id target_name target_type_is_import \\\n", + "0 1165098 618562 Newsletter mensuelle False \n", + "1 1165100 618559 Newsletter mensuelle False \n", + "2 1165101 618561 Newsletter mensuelle False \n", + "3 1165102 618560 Newsletter mensuelle False \n", + "4 1165103 618558 Newsletter mensuelle False \n", + "... ... ... ... ... \n", + "69253 1698158 18580 Newsletter mensuelle False \n", + "69254 1698159 18569 Newsletter mensuelle False \n", + "69255 1698160 2962 Newsletter mensuelle False \n", + "69256 1698161 3825 Newsletter mensuelle False \n", + "69257 1698162 5731 Newsletter mensuelle False \n", + "\n", + " target_type_name \n", + "0 manual_static_filter \n", + "1 manual_static_filter \n", + "2 manual_static_filter \n", + "3 manual_static_filter \n", + "4 manual_static_filter \n", + "... ... \n", + "69253 manual_static_filter \n", + "69254 manual_static_filter \n", + "69255 manual_static_filter \n", + "69256 manual_static_filter \n", + "69257 manual_static_filter \n", + "\n", + "[69258 rows x 5 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "targets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c90d94ab-cf0e-4d18-9d5e-cb1d22f4d58b", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {