{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "0eefb67b-5399-44fa-9c1c-7724ec1c7cd2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import s3fs\n", "import warnings\n", "from datetime import date, timedelta, datetime\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "id": "37977b4e-42e7-4d8e-8b9a-6843292fd128", "metadata": {}, "outputs": [], "source": [ "# Import KPI construction functions\n", "exec(open('0_KPI_functions.py').read())\n", "# exec(open('../0_KPI_functions.py').read())\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "cca62d72-f809-41a9-bb06-1be7d6b09307", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 13, "id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_438/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n" ] } ], "source": [ "BUCKET = \"projet-bdc2324-team1\"\n", "FILE_KEY_S3 = \"0_Input/Company_10/products_purchased_reduced.csv\"\n", "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", "\n", "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n", " \n" ] }, { "cell_type": "code", "execution_count": 19, "id": "637aa400-f49a-4d8d-802a-868b241f8a9d", "metadata": {}, "outputs": [], "source": [ "\n", "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", "for nom_base in dic_base:\n", " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "e60529b5-986f-4685-91e1-782c2b022e09", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "customer_id | \n", "target_name | \n", "target_type_is_import | \n", "target_type_name | \n", "
---|---|---|---|---|---|
0 | \n", "1165098 | \n", "618562 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
1 | \n", "1165100 | \n", "618559 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
2 | \n", "1165101 | \n", "618561 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
3 | \n", "1165102 | \n", "618560 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
4 | \n", "1165103 | \n", "618558 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
69253 | \n", "1698158 | \n", "18580 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69254 | \n", "1698159 | \n", "18569 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69255 | \n", "1698160 | \n", "2962 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69256 | \n", "1698161 | \n", "3825 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69257 | \n", "1698162 | \n", "5731 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69258 rows × 5 columns
\n", "\n", " | ticket_id | \n", "customer_id | \n", "purchase_id | \n", "event_type_id | \n", "supplier_name | \n", "purchase_date | \n", "amount | \n", "is_full_price | \n", "name_event_types | \n", "name_facilities | \n", "name_categories | \n", "name_events | \n", "name_seasons | \n", "start_date_time | \n", "end_date_time | \n", "open | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1799177 | \n", "36984 | \n", "409613 | \n", "2 | \n", "guichet | \n", "2016-04-28 15:58:26+00:00 | \n", "9.0 | \n", "False | \n", "danse | \n", "le grand t | \n", "abo t gourmand jeune | \n", "aringa rossa | \n", "test 2016/2017 | \n", "2016-09-27 00:00:00+02:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
1 | \n", "1799178 | \n", "36984 | \n", "409613 | \n", "3 | \n", "guichet | \n", "2016-04-28 15:58:26+00:00 | \n", "9.0 | \n", "False | \n", "cirque | \n", "le grand t | \n", "abo t gourmand jeune | \n", "5èmes hurlants | \n", "test 2016/2017 | \n", "2016-11-18 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
2 | \n", "1799179 | \n", "36984 | \n", "409613 | \n", "1 | \n", "guichet | \n", "2016-04-28 15:58:26+00:00 | \n", "9.0 | \n", "False | \n", "théâtre | \n", "le grand t | \n", "abo t gourmand jeune | \n", "dom juan | \n", "test 2016/2017 | \n", "2016-12-07 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
3 | \n", "1799180 | \n", "36984 | \n", "409613 | \n", "1 | \n", "guichet | \n", "2016-04-28 15:58:26+00:00 | \n", "9.0 | \n", "False | \n", "théâtre | \n", "le grand t | \n", "abo t gourmand jeune | \n", "vanishing point | \n", "test 2016/2017 | \n", "2017-01-04 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
4 | \n", "1799181 | \n", "36984 | \n", "409613 | \n", "3 | \n", "guichet | \n", "2016-04-28 15:58:26+00:00 | \n", "12.0 | \n", "False | \n", "cirque | \n", "la cite des congres | \n", "abo t gourmand jeune | \n", "a o lang pho | \n", "test 2016/2017 | \n", "2017-01-03 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
492309 | \n", "3252232 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 11:08:45+00:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492310 | \n", "3252233 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 11:08:45+00:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492311 | \n", "3252234 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 11:08:45+00:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492312 | \n", "3252235 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 11:08:45+00:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492313 | \n", "3252236 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 11:08:45+00:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492314 rows × 16 columns
\n", "\n", " | id | \n", "customer_id | \n", "target_name | \n", "target_type_is_import | \n", "target_type_name | \n", "
---|---|---|---|---|---|
0 | \n", "1165098 | \n", "618562 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
1 | \n", "1165100 | \n", "618559 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
2 | \n", "1165101 | \n", "618561 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
3 | \n", "1165102 | \n", "618560 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
4 | \n", "1165103 | \n", "618558 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
69253 | \n", "1698158 | \n", "18580 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69254 | \n", "1698159 | \n", "18569 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69255 | \n", "1698160 | \n", "2962 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69256 | \n", "1698161 | \n", "3825 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69257 | \n", "1698162 | \n", "5731 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69258 rows × 5 columns
\n", "