{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "0eefb67b-5399-44fa-9c1c-7724ec1c7cd2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import s3fs\n", "import warnings\n", "from datetime import date, timedelta, datetime\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "id": "37977b4e-42e7-4d8e-8b9a-6843292fd128", "metadata": {}, "outputs": [], "source": [ "# Import KPI construction functions\n", "exec(open('0_KPI_functions.py').read())\n", "# exec(open('../0_KPI_functions.py').read())\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "cca62d72-f809-41a9-bb06-1be7d6b09307", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 13, "id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_438/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n" ] } ], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/products_purchased_reduced.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n", " \n" ] }, { "cell_type": "code", "execution_count": 19, "id": "637aa400-f49a-4d8d-802a-868b241f8a9d", "metadata": {}, "outputs": [], "source": [ "\n", "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", "for nom_base in dic_base:\n", " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "e60529b5-986f-4685-91e1-782c2b022e09", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcustomer_idtarget_nametarget_type_is_importtarget_type_name
01165098618562Newsletter mensuelleFalsemanual_static_filter
11165100618559Newsletter mensuelleFalsemanual_static_filter
21165101618561Newsletter mensuelleFalsemanual_static_filter
31165102618560Newsletter mensuelleFalsemanual_static_filter
41165103618558Newsletter mensuelleFalsemanual_static_filter
..................
69253169815818580Newsletter mensuelleFalsemanual_static_filter
69254169815918569Newsletter mensuelleFalsemanual_static_filter
6925516981602962Newsletter mensuelleFalsemanual_static_filter
6925616981613825Newsletter mensuelleFalsemanual_static_filter
6925716981625731Newsletter mensuelleFalsemanual_static_filter
\n", "

69258 rows × 5 columns

\n", "
" ], "text/plain": [ " id customer_id target_name target_type_is_import \\\n", "0 1165098 618562 Newsletter mensuelle False \n", "1 1165100 618559 Newsletter mensuelle False \n", "2 1165101 618561 Newsletter mensuelle False \n", "3 1165102 618560 Newsletter mensuelle False \n", "4 1165103 618558 Newsletter mensuelle False \n", "... ... ... ... ... \n", "69253 1698158 18580 Newsletter mensuelle False \n", "69254 1698159 18569 Newsletter mensuelle False \n", "69255 1698160 2962 Newsletter mensuelle False \n", "69256 1698161 3825 Newsletter mensuelle False \n", "69257 1698162 5731 Newsletter mensuelle False \n", "\n", " target_type_name \n", "0 manual_static_filter \n", "1 manual_static_filter \n", "2 manual_static_filter \n", "3 manual_static_filter \n", "4 manual_static_filter \n", "... ... \n", "69253 manual_static_filter \n", "69254 manual_static_filter \n", "69255 manual_static_filter \n", "69256 manual_static_filter \n", "69257 manual_static_filter \n", "\n", "[69258 rows x 5 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_information" ] }, { "cell_type": "code", "execution_count": 25, "id": "6ece1bb3-5a2d-41f8-be96-eb70697881dc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ticket_idcustomer_idpurchase_idevent_type_idsupplier_namepurchase_dateamountis_full_pricename_event_typesname_facilitiesname_categoriesname_eventsname_seasonsstart_date_timeend_date_timeopen
01799177369844096132guichet2016-04-28 15:58:26+00:009.0Falsedansele grand tabo t gourmand jeunearinga rossatest 2016/20172016-09-27 00:00:00+02:001901-01-01 00:09:21+00:09True
11799178369844096133guichet2016-04-28 15:58:26+00:009.0Falsecirquele grand tabo t gourmand jeune5èmes hurlantstest 2016/20172016-11-18 00:00:00+01:001901-01-01 00:09:21+00:09True
21799179369844096131guichet2016-04-28 15:58:26+00:009.0Falsethéâtrele grand tabo t gourmand jeunedom juantest 2016/20172016-12-07 00:00:00+01:001901-01-01 00:09:21+00:09True
31799180369844096131guichet2016-04-28 15:58:26+00:009.0Falsethéâtrele grand tabo t gourmand jeunevanishing pointtest 2016/20172017-01-04 00:00:00+01:001901-01-01 00:09:21+00:09True
41799181369844096133guichet2016-04-28 15:58:26+00:0012.0Falsecirquela cite des congresabo t gourmand jeunea o lang photest 2016/20172017-01-03 00:00:00+01:001901-01-01 00:09:21+00:09True
...................................................
49230932522326217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231032522336217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231132522346217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231232522356217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
49231332522366217167100621guichet2023-03-09 11:08:45+00:007.0Falsethéâtrecap norttarif sco co 1 seance scolairesur moi, le temps2022/20232023-03-13 14:00:00+01:001901-01-01 00:09:21+00:09True
\n", "

492314 rows × 16 columns

\n", "
" ], "text/plain": [ " ticket_id customer_id purchase_id event_type_id supplier_name \\\n", "0 1799177 36984 409613 2 guichet \n", "1 1799178 36984 409613 3 guichet \n", "2 1799179 36984 409613 1 guichet \n", "3 1799180 36984 409613 1 guichet \n", "4 1799181 36984 409613 3 guichet \n", "... ... ... ... ... ... \n", "492309 3252232 621716 710062 1 guichet \n", "492310 3252233 621716 710062 1 guichet \n", "492311 3252234 621716 710062 1 guichet \n", "492312 3252235 621716 710062 1 guichet \n", "492313 3252236 621716 710062 1 guichet \n", "\n", " purchase_date amount is_full_price name_event_types \\\n", "0 2016-04-28 15:58:26+00:00 9.0 False danse \n", "1 2016-04-28 15:58:26+00:00 9.0 False cirque \n", "2 2016-04-28 15:58:26+00:00 9.0 False théâtre \n", "3 2016-04-28 15:58:26+00:00 9.0 False théâtre \n", "4 2016-04-28 15:58:26+00:00 12.0 False cirque \n", "... ... ... ... ... \n", "492309 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", "492310 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", "492311 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", "492312 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", "492313 2023-03-09 11:08:45+00:00 7.0 False théâtre \n", "\n", " name_facilities name_categories \\\n", "0 le grand t abo t gourmand jeune \n", "1 le grand t abo t gourmand jeune \n", "2 le grand t abo t gourmand jeune \n", "3 le grand t abo t gourmand jeune \n", "4 la cite des congres abo t gourmand jeune \n", "... ... ... \n", "492309 cap nort tarif sco co 1 seance scolaire \n", "492310 cap nort tarif sco co 1 seance scolaire \n", "492311 cap nort tarif sco co 1 seance scolaire \n", "492312 cap nort tarif sco co 1 seance scolaire \n", "492313 cap nort tarif sco co 1 seance scolaire \n", "\n", " name_events name_seasons start_date_time \\\n", "0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n", "1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n", "2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n", "3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n", "4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n", "... ... ... ... \n", "492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", "492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", "492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", "492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", "492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n", "\n", " end_date_time open \n", "0 1901-01-01 00:09:21+00:09 True \n", "1 1901-01-01 00:09:21+00:09 True \n", "2 1901-01-01 00:09:21+00:09 True \n", "3 1901-01-01 00:09:21+00:09 True \n", "4 1901-01-01 00:09:21+00:09 True \n", "... ... ... \n", "492309 1901-01-01 00:09:21+00:09 True \n", "492310 1901-01-01 00:09:21+00:09 True \n", "492311 1901-01-01 00:09:21+00:09 True \n", "492312 1901-01-01 00:09:21+00:09 True \n", "492313 1901-01-01 00:09:21+00:09 True \n", "\n", "[492314 rows x 16 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "purchases" ] }, { "cell_type": "code", "execution_count": 29, "id": "484979cc-d4a4-4d9d-9701-71a4f353a372", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_438/1359829443.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", " campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n" ] } ], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/campaigns_information.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = [\"opened_at\", \"sent_at\", \"delivered_at\"], date_parser=custom_date_parser)\n", " \n" ] }, { "cell_type": "code", "execution_count": 32, "id": "553ca2e7-ead4-4508-8247-fcc602abd249", "metadata": {}, "outputs": [], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/target_information.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " targets = pd.read_csv(file_in, sep=\",\")\n", " \n" ] }, { "cell_type": "code", "execution_count": 33, "id": "17b89ca1-deea-4139-a6c0-7822cc4e7a90", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcustomer_idtarget_nametarget_type_is_importtarget_type_name
01165098618562Newsletter mensuelleFalsemanual_static_filter
11165100618559Newsletter mensuelleFalsemanual_static_filter
21165101618561Newsletter mensuelleFalsemanual_static_filter
31165102618560Newsletter mensuelleFalsemanual_static_filter
41165103618558Newsletter mensuelleFalsemanual_static_filter
..................
69253169815818580Newsletter mensuelleFalsemanual_static_filter
69254169815918569Newsletter mensuelleFalsemanual_static_filter
6925516981602962Newsletter mensuelleFalsemanual_static_filter
6925616981613825Newsletter mensuelleFalsemanual_static_filter
6925716981625731Newsletter mensuelleFalsemanual_static_filter
\n", "

69258 rows × 5 columns

\n", "
" ], "text/plain": [ " id customer_id target_name target_type_is_import \\\n", "0 1165098 618562 Newsletter mensuelle False \n", "1 1165100 618559 Newsletter mensuelle False \n", "2 1165101 618561 Newsletter mensuelle False \n", "3 1165102 618560 Newsletter mensuelle False \n", "4 1165103 618558 Newsletter mensuelle False \n", "... ... ... ... ... \n", "69253 1698158 18580 Newsletter mensuelle False \n", "69254 1698159 18569 Newsletter mensuelle False \n", "69255 1698160 2962 Newsletter mensuelle False \n", "69256 1698161 3825 Newsletter mensuelle False \n", "69257 1698162 5731 Newsletter mensuelle False \n", "\n", " target_type_name \n", "0 manual_static_filter \n", "1 manual_static_filter \n", "2 manual_static_filter \n", "3 manual_static_filter \n", "4 manual_static_filter \n", "... ... \n", "69253 manual_static_filter \n", "69254 manual_static_filter \n", "69255 manual_static_filter \n", "69256 manual_static_filter \n", "69257 manual_static_filter \n", "\n", "[69258 rows x 5 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "targets" ] }, { "cell_type": "code", "execution_count": null, "id": "c90d94ab-cf0e-4d18-9d5e-cb1d22f4d58b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }