{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "0eefb67b-5399-44fa-9c1c-7724ec1c7cd2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import s3fs\n", "import warnings\n", "from datetime import date, timedelta, datetime\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "id": "37977b4e-42e7-4d8e-8b9a-6843292fd128", "metadata": {}, "outputs": [], "source": [ "# Import KPI construction functions\n", "exec(open('0_KPI_functions.py').read())\n", "# exec(open('../0_KPI_functions.py').read())\n" ] }, { "cell_type": "code", "execution_count": 70, "id": "cca62d72-f809-41a9-bb06-1be7d6b09307", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 71, "id": "68fb54f3-8eb3-4cd0-966b-000876912fb5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ticket_id | \n", "customer_id | \n", "purchase_id | \n", "event_type_id | \n", "supplier_name | \n", "purchase_date | \n", "amount | \n", "is_full_price | \n", "name_event_types | \n", "name_facilities | \n", "name_categories | \n", "name_events | \n", "name_seasons | \n", "start_date_time | \n", "end_date_time | \n", "open | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1799177 | \n", "36984 | \n", "409613 | \n", "2 | \n", "guichet | \n", "2016-04-28 17:58:26+02:00 | \n", "9.0 | \n", "False | \n", "danse | \n", "le grand t | \n", "abo t gourmand jeune | \n", "aringa rossa | \n", "test 2016/2017 | \n", "2016-09-27 00:00:00+02:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
1 | \n", "1799178 | \n", "36984 | \n", "409613 | \n", "3 | \n", "guichet | \n", "2016-04-28 17:58:26+02:00 | \n", "9.0 | \n", "False | \n", "cirque | \n", "le grand t | \n", "abo t gourmand jeune | \n", "5èmes hurlants | \n", "test 2016/2017 | \n", "2016-11-18 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
2 | \n", "1799179 | \n", "36984 | \n", "409613 | \n", "1 | \n", "guichet | \n", "2016-04-28 17:58:26+02:00 | \n", "9.0 | \n", "False | \n", "théâtre | \n", "le grand t | \n", "abo t gourmand jeune | \n", "dom juan | \n", "test 2016/2017 | \n", "2016-12-07 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
3 | \n", "1799180 | \n", "36984 | \n", "409613 | \n", "1 | \n", "guichet | \n", "2016-04-28 17:58:26+02:00 | \n", "9.0 | \n", "False | \n", "théâtre | \n", "le grand t | \n", "abo t gourmand jeune | \n", "vanishing point | \n", "test 2016/2017 | \n", "2017-01-04 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
4 | \n", "1799181 | \n", "36984 | \n", "409613 | \n", "3 | \n", "guichet | \n", "2016-04-28 17:58:26+02:00 | \n", "12.0 | \n", "False | \n", "cirque | \n", "la cite des congres | \n", "abo t gourmand jeune | \n", "a o lang pho | \n", "test 2016/2017 | \n", "2017-01-03 00:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
492309 | \n", "3252232 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 12:08:45+01:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492310 | \n", "3252233 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 12:08:45+01:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492311 | \n", "3252234 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 12:08:45+01:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492312 | \n", "3252235 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 12:08:45+01:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492313 | \n", "3252236 | \n", "621716 | \n", "710062 | \n", "1 | \n", "guichet | \n", "2023-03-09 12:08:45+01:00 | \n", "7.0 | \n", "False | \n", "théâtre | \n", "cap nort | \n", "tarif sco co 1 seance scolaire | \n", "sur moi, le temps | \n", "2022/2023 | \n", "2023-03-13 14:00:00+01:00 | \n", "1901-01-01 00:09:21+00:09 | \n", "True | \n", "
492314 rows × 16 columns
\n", "\n", " | id | \n", "customer_id | \n", "target_name | \n", "target_type_is_import | \n", "target_type_name | \n", "
---|---|---|---|---|---|
0 | \n", "1165098 | \n", "618562 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
1 | \n", "1165100 | \n", "618559 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
2 | \n", "1165101 | \n", "618561 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
3 | \n", "1165102 | \n", "618560 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
4 | \n", "1165103 | \n", "618558 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
69253 | \n", "1698158 | \n", "18580 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69254 | \n", "1698159 | \n", "18569 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69255 | \n", "1698160 | \n", "2962 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69256 | \n", "1698161 | \n", "3825 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69257 | \n", "1698162 | \n", "5731 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69258 rows × 5 columns
\n", "\n", " | customer_id | \n", "nb_campaigns | \n", "nb_campaigns_opened | \n", "time_to_open | \n", "
---|---|---|---|---|
0 | \n", "29 | \n", "4 | \n", "NaN | \n", "NaT | \n", "
1 | \n", "37 | \n", "3 | \n", "NaN | \n", "NaT | \n", "
2 | \n", "39 | \n", "4 | \n", "1.0 | \n", "0 days 05:16:38 | \n", "
3 | \n", "41 | \n", "4 | \n", "1.0 | \n", "0 days 01:12:29 | \n", "
4 | \n", "44 | \n", "4 | \n", "NaN | \n", "NaT | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
57138 | \n", "827940 | \n", "1 | \n", "NaN | \n", "NaT | \n", "
57139 | \n", "827941 | \n", "1 | \n", "NaN | \n", "NaT | \n", "
57140 | \n", "827942 | \n", "1 | \n", "NaN | \n", "NaT | \n", "
57141 | \n", "827943 | \n", "1 | \n", "NaN | \n", "NaT | \n", "
57142 | \n", "827944 | \n", "1 | \n", "NaN | \n", "NaT | \n", "
57143 rows × 4 columns
\n", "\n", " | customer_id | \n", "nb_tickets | \n", "nb_purchases | \n", "total_amount | \n", "nb_suppliers | \n", "vente_internet_max | \n", "purchase_date_min | \n", "purchase_date_max | \n", "time_between_purchase | \n", "nb_tickets_internet | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "19482 | \n", "88 | \n", "29 | \n", "872.0 | \n", "2 | \n", "1 | \n", "2643.092500 | \n", "718.149398 | \n", "1924.943102 | \n", "8.0 | \n", "
1 | \n", "19484 | \n", "3 | \n", "2 | \n", "62.0 | \n", "1 | \n", "0 | \n", "1745.021736 | \n", "1743.045035 | \n", "1.976701 | \n", "0.0 | \n", "
2 | \n", "19485 | \n", "131 | \n", "21 | \n", "1878.0 | \n", "2 | \n", "1 | \n", "2649.044745 | \n", "85.240845 | \n", "2563.803900 | \n", "84.0 | \n", "
3 | \n", "19486 | \n", "10 | \n", "4 | \n", "96.0 | \n", "1 | \n", "0 | \n", "1944.077604 | \n", "1742.794225 | \n", "201.283380 | \n", "0.0 | \n", "
4 | \n", "19487 | \n", "2 | \n", "1 | \n", "33.0 | \n", "1 | \n", "0 | \n", "1742.877766 | \n", "1742.877766 | \n", "0.000000 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
26100 | \n", "824877 | \n", "1 | \n", "1 | \n", "-12.0 | \n", "1 | \n", "0 | \n", "5.956111 | \n", "5.956111 | \n", "0.000000 | \n", "0.0 | \n", "
26101 | \n", "824878 | \n", "1 | \n", "1 | \n", "12.0 | \n", "1 | \n", "0 | \n", "5.956921 | \n", "5.956921 | \n", "0.000000 | \n", "0.0 | \n", "
26102 | \n", "824879 | \n", "2 | \n", "1 | \n", "-38.0 | \n", "1 | \n", "0 | \n", "5.226238 | \n", "5.226238 | \n", "0.000000 | \n", "0.0 | \n", "
26103 | \n", "824991 | \n", "14 | \n", "3 | \n", "-100.0 | \n", "1 | \n", "0 | \n", "3.021539 | \n", "3.017222 | \n", "0.004317 | \n", "0.0 | \n", "
26104 | \n", "824998 | \n", "1 | \n", "1 | \n", "25.0 | \n", "1 | \n", "0 | \n", "0.072720 | \n", "0.072720 | \n", "0.000000 | \n", "0.0 | \n", "
26105 rows × 10 columns
\n", "\n", " | customer_id | \n", "street_id | \n", "structure_id | \n", "mcp_contact_id | \n", "fidelity | \n", "tenant_id | \n", "is_partner | \n", "deleted_at | \n", "gender | \n", "is_email_true | \n", "... | \n", "total_price | \n", "purchase_count | \n", "first_buying_date | \n", "country | \n", "gender_label | \n", "gender_female | \n", "gender_male | \n", "gender_other | \n", "country_fr | \n", "has_tags | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "821538 | \n", "139 | \n", "NaN | \n", "NaN | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "True | \n", "... | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "NaN | \n", "0 | \n", "
1 | \n", "809126 | \n", "1063 | \n", "NaN | \n", "NaN | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "True | \n", "... | \n", "0.0 | \n", "0 | \n", "NaN | \n", "fr | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "1.0 | \n", "0 | \n", "
2 | \n", "11005 | \n", "1063 | \n", "NaN | \n", "NaN | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "False | \n", "... | \n", "NaN | \n", "14 | \n", "NaN | \n", "fr | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "1.0 | \n", "0 | \n", "
3 | \n", "17663 | \n", "12731 | \n", "NaN | \n", "NaN | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "0 | \n", "False | \n", "... | \n", "NaN | \n", "1 | \n", "NaN | \n", "fr | \n", "female | \n", "1 | \n", "0 | \n", "0 | \n", "1.0 | \n", "0 | \n", "
4 | \n", "38100 | \n", "12395 | \n", "NaN | \n", "NaN | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "0 | \n", "True | \n", "... | \n", "NaN | \n", "1 | \n", "NaN | \n", "fr | \n", "female | \n", "1 | \n", "0 | \n", "0 | \n", "1.0 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
98789 | \n", "766266 | \n", "139 | \n", "NaN | \n", "181304.0 | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "True | \n", "... | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "NaN | \n", "0 | \n", "
98790 | \n", "766336 | \n", "139 | \n", "NaN | \n", "178189.0 | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "True | \n", "... | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "NaN | \n", "0 | \n", "
98791 | \n", "766348 | \n", "139 | \n", "NaN | \n", "178141.0 | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "True | \n", "... | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "NaN | \n", "0 | \n", "
98792 | \n", "766363 | \n", "139 | \n", "NaN | \n", "176807.0 | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "True | \n", "... | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "NaN | \n", "0 | \n", "
98793 | \n", "766366 | \n", "139 | \n", "NaN | \n", "176788.0 | \n", "0 | \n", "875 | \n", "False | \n", "NaN | \n", "2 | \n", "True | \n", "... | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "other | \n", "0 | \n", "0 | \n", "1 | \n", "NaN | \n", "0 | \n", "
98794 rows × 28 columns
\n", "\n", " | id | \n", "customer_id | \n", "target_name | \n", "target_type_is_import | \n", "target_type_name | \n", "
---|---|---|---|---|---|
0 | \n", "1165098 | \n", "618562 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
1 | \n", "1165100 | \n", "618559 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
2 | \n", "1165101 | \n", "618561 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
3 | \n", "1165102 | \n", "618560 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
4 | \n", "1165103 | \n", "618558 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
69253 | \n", "1698158 | \n", "18580 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69254 | \n", "1698159 | \n", "18569 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69255 | \n", "1698160 | \n", "2962 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69256 | \n", "1698161 | \n", "3825 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69257 | \n", "1698162 | \n", "5731 | \n", "Newsletter mensuelle | \n", "False | \n", "manual_static_filter | \n", "
69258 rows × 5 columns
\n", "