diff --git a/Spectacle/Exploration_spectacle.ipynb b/Spectacle/Exploration_spectacle.ipynb index 9fa6532..841d297 100644 --- a/Spectacle/Exploration_spectacle.ipynb +++ b/Spectacle/Exploration_spectacle.ipynb @@ -17,30 +17,805 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "37977b4e-42e7-4d8e-8b9a-6843292fd128", "metadata": {}, "outputs": [], "source": [ "# Import KPI construction functions\n", - "# exec(open('0_KPI_functions.py').read())\n", - "exec(open('../0_KPI_functions.py').read())\n" + "exec(open('0_KPI_functions.py').read())\n", + "# exec(open('../0_KPI_functions.py').read())\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "cca62d72-f809-41a9-bb06-1be7d6b09307", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", - "BUCKET = \"bdc2324-data\"\n", + "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", "fs.ls(BUCKET)" ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0e1ce56c-2e50-456c-ba97-ed4a699cc8d4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_438/3710670046.py:6: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n", + " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n" + ] + } + ], + "source": [ + "BUCKET = \"projet-bdc2324-team1\"\n", + "FILE_KEY_S3 = \"0_Input/Company_10/products_purchased_reduced.csv\"\n", + "FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "637aa400-f49a-4d8d-802a-868b241f8a9d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", + "for nom_base in dic_base:\n", + " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", + " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", + " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "e60529b5-986f-4685-91e1-782c2b022e09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | id | \n", + "customer_id | \n", + "target_name | \n", + "target_type_is_import | \n", + "target_type_name | \n", + "
---|---|---|---|---|---|
0 | \n", + "1165098 | \n", + "618562 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
1 | \n", + "1165100 | \n", + "618559 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
2 | \n", + "1165101 | \n", + "618561 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
3 | \n", + "1165102 | \n", + "618560 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
4 | \n", + "1165103 | \n", + "618558 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
69253 | \n", + "1698158 | \n", + "18580 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69254 | \n", + "1698159 | \n", + "18569 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69255 | \n", + "1698160 | \n", + "2962 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69256 | \n", + "1698161 | \n", + "3825 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69257 | \n", + "1698162 | \n", + "5731 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69258 rows × 5 columns
\n", + "\n", + " | ticket_id | \n", + "customer_id | \n", + "purchase_id | \n", + "event_type_id | \n", + "supplier_name | \n", + "purchase_date | \n", + "amount | \n", + "is_full_price | \n", + "name_event_types | \n", + "name_facilities | \n", + "name_categories | \n", + "name_events | \n", + "name_seasons | \n", + "start_date_time | \n", + "end_date_time | \n", + "open | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "1799177 | \n", + "36984 | \n", + "409613 | \n", + "2 | \n", + "guichet | \n", + "2016-04-28 15:58:26+00:00 | \n", + "9.0 | \n", + "False | \n", + "danse | \n", + "le grand t | \n", + "abo t gourmand jeune | \n", + "aringa rossa | \n", + "test 2016/2017 | \n", + "2016-09-27 00:00:00+02:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
1 | \n", + "1799178 | \n", + "36984 | \n", + "409613 | \n", + "3 | \n", + "guichet | \n", + "2016-04-28 15:58:26+00:00 | \n", + "9.0 | \n", + "False | \n", + "cirque | \n", + "le grand t | \n", + "abo t gourmand jeune | \n", + "5èmes hurlants | \n", + "test 2016/2017 | \n", + "2016-11-18 00:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
2 | \n", + "1799179 | \n", + "36984 | \n", + "409613 | \n", + "1 | \n", + "guichet | \n", + "2016-04-28 15:58:26+00:00 | \n", + "9.0 | \n", + "False | \n", + "théâtre | \n", + "le grand t | \n", + "abo t gourmand jeune | \n", + "dom juan | \n", + "test 2016/2017 | \n", + "2016-12-07 00:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
3 | \n", + "1799180 | \n", + "36984 | \n", + "409613 | \n", + "1 | \n", + "guichet | \n", + "2016-04-28 15:58:26+00:00 | \n", + "9.0 | \n", + "False | \n", + "théâtre | \n", + "le grand t | \n", + "abo t gourmand jeune | \n", + "vanishing point | \n", + "test 2016/2017 | \n", + "2017-01-04 00:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
4 | \n", + "1799181 | \n", + "36984 | \n", + "409613 | \n", + "3 | \n", + "guichet | \n", + "2016-04-28 15:58:26+00:00 | \n", + "12.0 | \n", + "False | \n", + "cirque | \n", + "la cite des congres | \n", + "abo t gourmand jeune | \n", + "a o lang pho | \n", + "test 2016/2017 | \n", + "2017-01-03 00:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
492309 | \n", + "3252232 | \n", + "621716 | \n", + "710062 | \n", + "1 | \n", + "guichet | \n", + "2023-03-09 11:08:45+00:00 | \n", + "7.0 | \n", + "False | \n", + "théâtre | \n", + "cap nort | \n", + "tarif sco co 1 seance scolaire | \n", + "sur moi, le temps | \n", + "2022/2023 | \n", + "2023-03-13 14:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
492310 | \n", + "3252233 | \n", + "621716 | \n", + "710062 | \n", + "1 | \n", + "guichet | \n", + "2023-03-09 11:08:45+00:00 | \n", + "7.0 | \n", + "False | \n", + "théâtre | \n", + "cap nort | \n", + "tarif sco co 1 seance scolaire | \n", + "sur moi, le temps | \n", + "2022/2023 | \n", + "2023-03-13 14:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
492311 | \n", + "3252234 | \n", + "621716 | \n", + "710062 | \n", + "1 | \n", + "guichet | \n", + "2023-03-09 11:08:45+00:00 | \n", + "7.0 | \n", + "False | \n", + "théâtre | \n", + "cap nort | \n", + "tarif sco co 1 seance scolaire | \n", + "sur moi, le temps | \n", + "2022/2023 | \n", + "2023-03-13 14:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
492312 | \n", + "3252235 | \n", + "621716 | \n", + "710062 | \n", + "1 | \n", + "guichet | \n", + "2023-03-09 11:08:45+00:00 | \n", + "7.0 | \n", + "False | \n", + "théâtre | \n", + "cap nort | \n", + "tarif sco co 1 seance scolaire | \n", + "sur moi, le temps | \n", + "2022/2023 | \n", + "2023-03-13 14:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
492313 | \n", + "3252236 | \n", + "621716 | \n", + "710062 | \n", + "1 | \n", + "guichet | \n", + "2023-03-09 11:08:45+00:00 | \n", + "7.0 | \n", + "False | \n", + "théâtre | \n", + "cap nort | \n", + "tarif sco co 1 seance scolaire | \n", + "sur moi, le temps | \n", + "2022/2023 | \n", + "2023-03-13 14:00:00+01:00 | \n", + "1901-01-01 00:09:21+00:09 | \n", + "True | \n", + "
492314 rows × 16 columns
\n", + "\n", + " | id | \n", + "customer_id | \n", + "target_name | \n", + "target_type_is_import | \n", + "target_type_name | \n", + "
---|---|---|---|---|---|
0 | \n", + "1165098 | \n", + "618562 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
1 | \n", + "1165100 | \n", + "618559 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
2 | \n", + "1165101 | \n", + "618561 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
3 | \n", + "1165102 | \n", + "618560 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
4 | \n", + "1165103 | \n", + "618558 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
69253 | \n", + "1698158 | \n", + "18580 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69254 | \n", + "1698159 | \n", + "18569 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69255 | \n", + "1698160 | \n", + "2962 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69256 | \n", + "1698161 | \n", + "3825 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69257 | \n", + "1698162 | \n", + "5731 | \n", + "Newsletter mensuelle | \n", + "False | \n", + "manual_static_filter | \n", + "
69258 rows × 5 columns
\n", + "