{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5ce2ffc5-66b6-4709-9e2c-7a50f49d1361", "metadata": {}, "outputs": [], "source": [ "# test\n", "\n", "import os \n", "import s3fs\n", "import pandas as pd\n", "import re" ] }, { "cell_type": "code", "execution_count": 35, "id": "f579ff01-f009-4fb1-ba79-0cb3ce58ab7f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['bdc2324-data/1',\n", " 'bdc2324-data/10',\n", " 'bdc2324-data/101',\n", " 'bdc2324-data/11',\n", " 'bdc2324-data/12',\n", " 'bdc2324-data/13',\n", " 'bdc2324-data/14',\n", " 'bdc2324-data/2',\n", " 'bdc2324-data/3',\n", " 'bdc2324-data/4',\n", " 'bdc2324-data/5',\n", " 'bdc2324-data/6',\n", " 'bdc2324-data/7',\n", " 'bdc2324-data/8',\n", " 'bdc2324-data/9']" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "\n", "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n", "BUCKET = \"bdc2324-data\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 27, "id": "c8b2c797-271f-43ee-8823-d0aee5b8782d", "metadata": {}, "outputs": [], "source": [ "FILE_PATH_S3 = fs.ls(BUCKET)[1] # +\".csv\"\n", "files_path_2 = fs.ls(FILE_PATH_S3)\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "18cee687-1462-4169-9bfe-f39786135cdd", "metadata": {}, "outputs": [], "source": [ "with fs.open(files_path_1[1], mode=\"rb\") as file_in:\n", " # print(file_in)\n", " df_campaigns = pd.read_csv(file_in)" ] }, { "cell_type": "code", "execution_count": 5, "id": "33e8d14c-c649-4b9c-8290-4a2aa635f999", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "name | \n", "service_id | \n", "created_at | \n", "updated_at | \n", "process_id | \n", "report_url | \n", "category | \n", "to_be_synced | \n", "identifier | \n", "sent_at | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1319613 | \n", "newsletter enseignants janvier 2022 | \n", "721 | \n", "2022-01-14 16:06:42.586321+01:00 | \n", "2022-02-03 14:17:27.112963+01:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "aba3b6fd5d186d28e06ff97135cade7f | \n", "2022-01-14 00:00:00+01:00 | \n", "
1 | \n", "1319586 | \n", "lsf_janvier_2022 | \n", "717 | \n", "2022-01-07 11:30:35.315895+01:00 | \n", "2022-02-03 14:17:27.116171+01:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "788d986905533aba051261497ecffcbb | \n", "2022-01-07 00:00:00+01:00 | \n", "
2 | \n", "1319282 | \n", "Invitation à déjeuner au Mucem | Vernissage « ... | \n", "591 | \n", "2021-09-28 12:50:24.448752+02:00 | \n", "2022-02-03 14:17:27.119582+01:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "3493894fa4ea036cfc6433c3e2ee63b0 | \n", "2021-09-28 00:00:00+02:00 | \n", "
3 | \n", "1319283 | \n", "Vacances de la Toussaint - centres des loisirs | \n", "590 | \n", "2021-09-28 18:01:04.692073+02:00 | \n", "2022-02-03 14:17:27.124408+01:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "08b255a5d42b89b0585260b6f2360bdd | \n", "2021-09-28 00:00:00+02:00 | \n", "
4 | \n", "1319636 | \n", "ddcp_promo_md_livemag | \n", "730 | \n", "2022-01-27 18:00:41.053069+01:00 | \n", "2022-02-03 14:17:27.127607+01:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "d5cfead94f5350c12c322b5b664544c1 | \n", "2022-01-27 00:00:00+01:00 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
952 | \n", "1320072 | \n", "dre_gaza0106 | \n", "881 | \n", "2022-05-26 09:01:35.523639+02:00 | \n", "2022-12-02 17:51:22.614046+01:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "7504adad8bb96320eb3afdd4df6e1f60 | \n", "2022-05-26 00:00:00+02:00 | \n", "
953 | \n", "661398 | \n", "DDCP Plan Bis 4 - Marketing direct - MJ5C | \n", "183 | \n", "2021-06-18 10:30:01.259578+02:00 | \n", "2021-09-24 11:56:09.082785+02:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "cedebb6e872f539bef8c3f919874e9d7 | \n", "2020-07-27 00:00:00+02:00 | \n", "
954 | \n", "1320487 | \n", "Invitation portes ouvertes amitiés | \n", "988 | \n", "2022-09-29 18:01:33.834090+02:00 | \n", "2022-12-02 17:51:23.258324+01:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "9908279ebbf1f9b250ba689db6a0222b | \n", "2022-09-29 00:00:00+02:00 | \n", "
955 | \n", "906903 | \n", "DDCP PROMO La méditerranée des philosophes #3 ... | \n", "310 | \n", "2021-07-19 14:07:16.177390+02:00 | \n", "2021-09-24 11:56:09.086101+02:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "06eb61b839a0cefee4967c67ccb099dc | \n", "2020-12-23 00:00:00+01:00 | \n", "
956 | \n", "579313 | \n", "ddcp_promo_automation_manuel_pre_visit | \n", "481 | \n", "2021-06-08 17:38:54.041310+02:00 | \n", "2021-09-24 11:56:09.089394+02:00 | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "False | \n", "9461cce28ebe3e76fb4b931c35a169b0 | \n", "2021-06-08 00:00:00+02:00 | \n", "
957 rows × 11 columns
\n", "\n", " | id | \n", "number | \n", "created_at | \n", "updated_at | \n", "purchase_id | \n", "product_id | \n", "is_from_subscription | \n", "type_of | \n", "supplier_id | \n", "barcode | \n", "identifier | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1799177 | \n", "60_0_0_0_1_k-5 | \n", "2021-12-29 07:27:27.868513+01:00 | \n", "2021-12-29 07:27:27.868513+01:00 | \n", "409613 | \n", "321683 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "56c3db5a02c87af7e525676092cb7c4a | \n", "
1 | \n", "1799178 | \n", "71_0_0_0_1_k-5 | \n", "2021-12-29 07:27:27.976380+01:00 | \n", "2021-12-29 07:27:27.976380+01:00 | \n", "409613 | \n", "321684 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "1ecad1dc6b42b4cdb75784dd9dcd9d5c | \n", "
2 | \n", "1799179 | \n", "93_0_0_0_1_k-5 | \n", "2021-12-29 07:27:27.978719+01:00 | \n", "2021-12-29 07:27:27.978719+01:00 | \n", "409613 | \n", "321685 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "b3d207bdb47bcdb27a52f6bae0db7ec2 | \n", "
3 | \n", "1799180 | \n", "103_0_0_0_1_k-5 | \n", "2021-12-29 07:27:27.984621+01:00 | \n", "2021-12-29 07:27:27.984621+01:00 | \n", "409613 | \n", "321686 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "10df9591b617cc177516e9ddf91ddae3 | \n", "
4 | \n", "1799181 | \n", "107_0_3_2_1_h-1 | \n", "2021-12-29 07:27:27.988602+01:00 | \n", "2021-12-29 07:27:27.988602+01:00 | \n", "409613 | \n", "321687 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "3a8c7d5882fe9f20f0f59c8d90c9873c | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
492309 | \n", "3252232 | \n", "336359 | \n", "2023-03-10 01:31:52.543375+01:00 | \n", "2023-03-10 01:31:52.543375+01:00 | \n", "710062 | \n", "572547 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "fc96f582931209501ed186d709664980 | \n", "
492310 | \n", "3252233 | \n", "336360 | \n", "2023-03-10 01:31:52.543869+01:00 | \n", "2023-03-10 01:31:52.543869+01:00 | \n", "710062 | \n", "572547 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "d4ccfb00a9b22b62654bbf98b4d9a5a5 | \n", "
492311 | \n", "3252234 | \n", "336361 | \n", "2023-03-10 01:31:52.545783+01:00 | \n", "2023-03-10 01:31:52.545783+01:00 | \n", "710062 | \n", "572547 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "d5f76662d6571b8eaceaf19c781fa514 | \n", "
492312 | \n", "3252235 | \n", "336362 | \n", "2023-03-10 01:31:52.547043+01:00 | \n", "2023-03-10 01:31:52.547043+01:00 | \n", "710062 | \n", "572547 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "093225db5cd5e06cc8e06242b4cbba37 | \n", "
492313 | \n", "3252236 | \n", "336363 | \n", "2023-03-10 01:31:52.548311+01:00 | \n", "2023-03-10 01:31:52.548311+01:00 | \n", "710062 | \n", "572547 | \n", "False | \n", "1 | \n", "2 | \n", "NaN | \n", "9bace0d0cd7a5ec559aca8ac8bf67700 | \n", "
492314 rows × 11 columns
\n", "\n", " | id | \n", "lastname | \n", "firstname | \n", "birthdate | \n", "street_id | \n", "created_at | \n", "updated_at | \n", "civility | \n", "is_partner | \n", "... | \n", "preferred_category | \n", "preferred_supplier | \n", "preferred_formula | \n", "purchase_count | \n", "first_buying_date | \n", "last_visiting_date | \n", "zipcode | \n", "country | \n", "age | \n", "tenant_id | \n", "|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "821538 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "email821538 | \n", "139 | \n", "2023-07-14 11:43:34.261637+02:00 | \n", "2023-07-14 11:43:34.261637+02:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "875 | \n", "
1 | \n", "809126 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "email809126 | \n", "1063 | \n", "2023-05-04 17:17:24.456829+02:00 | \n", "2023-05-04 17:17:24.456829+02:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "fr | \n", "NaN | \n", "875 | \n", "
2 | \n", "11005 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1063 | \n", "2017-07-06 03:01:57.242998+02:00 | \n", "2018-11-12 18:01:18.283492+01:00 | \n", "NaN | \n", "False | \n", "... | \n", "zone tarif 1 | \n", "NaN | \n", "invite rp | \n", "14 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "fr | \n", "NaN | \n", "875 | \n", "
3 | \n", "17663 | \n", "lastname17663 | \n", "firstname17663 | \n", "NaN | \n", "NaN | \n", "12731 | \n", "2018-09-23 02:39:17.778100+02:00 | \n", "2018-09-23 02:39:17.778100+02:00 | \n", "NaN | \n", "False | \n", "... | \n", "zone tarif 1 | \n", "NaN | \n", "detaxe | \n", "1 | \n", "NaN | \n", "NaN | \n", "44220 | \n", "fr | \n", "NaN | \n", "875 | \n", "
4 | \n", "38100 | \n", "lastname38100 | \n", "firstname38100 | \n", "NaN | \n", "NaN | \n", "12395 | \n", "2019-02-11 11:05:58.581121+01:00 | \n", "2022-12-06 23:15:33.485866+01:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "NaN | \n", "NaN | \n", "44100 | \n", "fr | \n", "NaN | \n", "875 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
98789 | \n", "766266 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "email766266 | \n", "139 | \n", "2022-12-06 18:26:04.142337+01:00 | \n", "2023-05-03 18:01:01.799141+02:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "875 | \n", "
98790 | \n", "766336 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "email766336 | \n", "139 | \n", "2022-12-06 18:28:49.139502+01:00 | \n", "2022-12-06 23:15:33.485866+01:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "875 | \n", "
98791 | \n", "766348 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "email766348 | \n", "139 | \n", "2022-12-06 18:28:51.140745+01:00 | \n", "2022-12-06 23:15:33.485866+01:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "875 | \n", "
98792 | \n", "766363 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "email766363 | \n", "139 | \n", "2022-12-06 18:29:44.081056+01:00 | \n", "2022-12-06 23:15:33.485866+01:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "875 | \n", "
98793 | \n", "766366 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "email766366 | \n", "139 | \n", "2022-12-06 18:29:44.934174+01:00 | \n", "2022-12-06 23:15:33.485866+01:00 | \n", "NaN | \n", "False | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "875 | \n", "
98794 rows × 43 columns
\n", "