diff --git a/Notebook_AJ.ipynb b/Notebook_AJ.ipynb index 3338b82..2708e06 100644 --- a/Notebook_AJ.ipynb +++ b/Notebook_AJ.ipynb @@ -11,38 +11,54 @@ { "cell_type": "code", "execution_count": 1, + "id": "88af2795-8bf9-4df0-a059-be7c28fb4289", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "e05cd2c9-3f76-48e3-b4a6-5055445af2e4", + "metadata": {}, + "source": [ + "Configuration de l'accès aux données" + ] + }, + { + "cell_type": "code", + "execution_count": 2, "id": "3ba1f385-2a2f-4b0c-be79-66f618469a9f", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "['bdc2324-data/1',\n", - " 'bdc2324-data/10',\n", - " 'bdc2324-data/101',\n", - " 'bdc2324-data/11',\n", - " 'bdc2324-data/12',\n", - " 'bdc2324-data/13',\n", - " 'bdc2324-data/14',\n", - " 'bdc2324-data/2',\n", - " 'bdc2324-data/3',\n", - " 'bdc2324-data/4',\n", - " 'bdc2324-data/5',\n", - " 'bdc2324-data/6',\n", - " 'bdc2324-data/7',\n", - " 'bdc2324-data/8',\n", - " 'bdc2324-data/9']" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" + "ename": "PermissionError", + "evalue": "The Access Key Id you provided does not exist in our records.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:394\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items)\u001b[0m\n\u001b[1;32m 393\u001b[0m dircache \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 394\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m it:\n\u001b[1;32m 395\u001b[0m dircache\u001b[38;5;241m.\u001b[39mextend(i\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCommonPrefixes\u001b[39m\u001b[38;5;124m'\u001b[39m, []))\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/paginate.py:269\u001b[0m, in \u001b[0;36mPageIterator.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 269\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcurrent_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 270\u001b[0m parsed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_parsed_response(response)\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/paginate.py:357\u001b[0m, in \u001b[0;36mPageIterator._make_request\u001b[0;34m(self, current_kwargs)\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_make_request\u001b[39m(\u001b[38;5;28mself\u001b[39m, current_kwargs):\n\u001b[0;32m--> 357\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcurrent_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mClientError\u001b[0m: An error occurred (InvalidAccessKeyId) when calling the ListObjectsV2 operation: The Access Key Id you provided does not exist in our records.", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m fs \u001b[38;5;241m=\u001b[39m s3fs\u001b[38;5;241m.\u001b[39mS3FileSystem(client_kwargs\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mendpoint_url\u001b[39m\u001b[38;5;124m'\u001b[39m: S3_ENDPOINT_URL})\n\u001b[1;32m 8\u001b[0m BUCKET \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbdc2324-data\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 9\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mls\u001b[49m\u001b[43m(\u001b[49m\u001b[43mBUCKET\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:619\u001b[0m, in \u001b[0;36mS3FileSystem.ls\u001b[0;34m(self, path, detail, refresh, **kwargs)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\" List single \"directory\" with or without details\u001b[39;00m\n\u001b[1;32m 605\u001b[0m \n\u001b[1;32m 606\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 616\u001b[0m \u001b[38;5;124;03m additional arguments passed on\u001b[39;00m\n\u001b[1;32m 617\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 618\u001b[0m path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_strip_protocol(path)\u001b[38;5;241m.\u001b[39mrstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 619\u001b[0m files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_ls\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrefresh\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrefresh\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 620\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m files:\n\u001b[1;32m 621\u001b[0m files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ls(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent(path), refresh\u001b[38;5;241m=\u001b[39mrefresh)\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:487\u001b[0m, in \u001b[0;36mS3FileSystem._ls\u001b[0;34m(self, path, refresh)\u001b[0m\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsbuckets(refresh)\n\u001b[1;32m 486\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 487\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_lsdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrefresh\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:409\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items)\u001b[0m\n\u001b[1;32m 407\u001b[0m f[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ClientError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 409\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdircache[path] \u001b[38;5;241m=\u001b[39m files\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m files\n", + "\u001b[0;31mPermissionError\u001b[0m: The Access Key Id you provided does not exist in our records." + ] } ], "source": [ "import os\n", "import s3fs\n", - "\n", "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", @@ -50,6 +66,109 @@ "BUCKET = \"bdc2324-data\"\n", "fs.ls(BUCKET)" ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d60f6b27-00b4-4655-9325-79169d1e68df", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bdc2324-data/1\n", + "['bdc2324-data/1/1campaign_stats.csv', 'bdc2324-data/1/1campaigns.csv', 'bdc2324-data/1/1categories.csv', 'bdc2324-data/1/1countries.csv', 'bdc2324-data/1/1currencies.csv', 'bdc2324-data/1/1customer_target_mappings.csv', 'bdc2324-data/1/1customersplus.csv', 'bdc2324-data/1/1event_types.csv', 'bdc2324-data/1/1events.csv', 'bdc2324-data/1/1facilities.csv', 'bdc2324-data/1/1link_stats.csv', 'bdc2324-data/1/1pricing_formulas.csv', 'bdc2324-data/1/1product_packs.csv', 'bdc2324-data/1/1products.csv', 'bdc2324-data/1/1products_groups.csv', 'bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1representation_category_capacities.csv', 'bdc2324-data/1/1representations.csv', 'bdc2324-data/1/1seasons.csv', 'bdc2324-data/1/1structure_tag_mappings.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1tags.csv', 'bdc2324-data/1/1target_types.csv', 'bdc2324-data/1/1targets.csv', 'bdc2324-data/1/1tickets.csv', 'bdc2324-data/1/1type_of_categories.csv', 'bdc2324-data/1/1type_of_pricing_formulas.csv', 'bdc2324-data/1/1type_ofs.csv']\n", + "bdc2324-data/2\n", + "['bdc2324-data/2/2campaign_stats.csv', 'bdc2324-data/2/2campaigns.csv', 'bdc2324-data/2/2categories.csv', 'bdc2324-data/2/2contribution_sites.csv', 'bdc2324-data/2/2contributions.csv', 'bdc2324-data/2/2countries.csv', 'bdc2324-data/2/2currencies.csv', 'bdc2324-data/2/2customer_target_mappings.csv', 'bdc2324-data/2/2customersplus.csv', 'bdc2324-data/2/2event_types.csv', 'bdc2324-data/2/2events.csv', 'bdc2324-data/2/2facilities.csv', 'bdc2324-data/2/2link_stats.csv', 'bdc2324-data/2/2pricing_formulas.csv', 'bdc2324-data/2/2product_packs.csv', 'bdc2324-data/2/2products.csv', 'bdc2324-data/2/2products_groups.csv', 'bdc2324-data/2/2purchases.csv', 'bdc2324-data/2/2representation_category_capacities.csv', 'bdc2324-data/2/2representations.csv', 'bdc2324-data/2/2seasons.csv', 'bdc2324-data/2/2structure_tag_mappings.csv', 'bdc2324-data/2/2suppliers.csv', 'bdc2324-data/2/2tags.csv', 'bdc2324-data/2/2target_types.csv', 'bdc2324-data/2/2targets.csv', 'bdc2324-data/2/2tickets.csv']\n", + "bdc2324-data/3\n", + "['bdc2324-data/3/3campaign_stats.csv', 'bdc2324-data/3/3campaigns.csv', 'bdc2324-data/3/3categories.csv', 'bdc2324-data/3/3consumptions.csv', 'bdc2324-data/3/3contribution_sites.csv', 'bdc2324-data/3/3contributions.csv', 'bdc2324-data/3/3countries.csv', 'bdc2324-data/3/3currencies.csv', 'bdc2324-data/3/3customer_target_mappings.csv', 'bdc2324-data/3/3customersplus.csv', 'bdc2324-data/3/3event_types.csv', 'bdc2324-data/3/3events.csv', 'bdc2324-data/3/3facilities.csv', 'bdc2324-data/3/3link_stats.csv', 'bdc2324-data/3/3pricing_formulas.csv', 'bdc2324-data/3/3product_packs.csv', 'bdc2324-data/3/3products.csv', 'bdc2324-data/3/3products_groups.csv', 'bdc2324-data/3/3purchases.csv', 'bdc2324-data/3/3representation_category_capacities.csv', 'bdc2324-data/3/3representations.csv', 'bdc2324-data/3/3seasons.csv', 'bdc2324-data/3/3structure_tag_mappings.csv', 'bdc2324-data/3/3suppliers.csv', 'bdc2324-data/3/3tags.csv', 'bdc2324-data/3/3target_types.csv', 'bdc2324-data/3/3targets.csv', 'bdc2324-data/3/3tickets.csv']\n", + "bdc2324-data/4\n", + "['bdc2324-data/4/4campaign_stats.csv', 'bdc2324-data/4/4campaigns.csv', 'bdc2324-data/4/4categories.csv', 'bdc2324-data/4/4contribution_sites.csv', 'bdc2324-data/4/4contributions.csv', 'bdc2324-data/4/4countries.csv', 'bdc2324-data/4/4currencies.csv', 'bdc2324-data/4/4customer_target_mappings.csv', 'bdc2324-data/4/4customersplus.csv', 'bdc2324-data/4/4event_types.csv', 'bdc2324-data/4/4events.csv', 'bdc2324-data/4/4facilities.csv', 'bdc2324-data/4/4link_stats.csv', 'bdc2324-data/4/4pricing_formulas.csv', 'bdc2324-data/4/4product_packs.csv', 'bdc2324-data/4/4products.csv', 'bdc2324-data/4/4products_groups.csv', 'bdc2324-data/4/4purchases.csv', 'bdc2324-data/4/4representation_category_capacities.csv', 'bdc2324-data/4/4representations.csv', 'bdc2324-data/4/4seasons.csv', 'bdc2324-data/4/4structure_tag_mappings.csv', 'bdc2324-data/4/4suppliers.csv', 'bdc2324-data/4/4tags.csv', 'bdc2324-data/4/4target_types.csv', 'bdc2324-data/4/4targets.csv', 'bdc2324-data/4/4tickets.csv', 'bdc2324-data/4/4type_of_pricing_formulas.csv', 'bdc2324-data/4/4type_ofs.csv']\n", + "bdc2324-data/5\n", + "['bdc2324-data/5/5campaign_stats.csv', 'bdc2324-data/5/5campaigns.csv', 'bdc2324-data/5/5categories.csv', 'bdc2324-data/5/5consumptions.csv', 'bdc2324-data/5/5countries.csv', 'bdc2324-data/5/5currencies.csv', 'bdc2324-data/5/5customer_target_mappings.csv', 'bdc2324-data/5/5customersplus.csv', 'bdc2324-data/5/5event_types.csv', 'bdc2324-data/5/5events.csv', 'bdc2324-data/5/5facilities.csv', 'bdc2324-data/5/5link_stats.csv', 'bdc2324-data/5/5pricing_formulas.csv', 'bdc2324-data/5/5product_packs.csv', 'bdc2324-data/5/5products.csv', 'bdc2324-data/5/5products_groups.csv', 'bdc2324-data/5/5purchases.csv', 'bdc2324-data/5/5representation_category_capacities.csv', 'bdc2324-data/5/5representations.csv', 'bdc2324-data/5/5seasons.csv', 'bdc2324-data/5/5suppliers.csv', 'bdc2324-data/5/5target_types.csv', 'bdc2324-data/5/5targets.csv', 'bdc2324-data/5/5tickets.csv']\n", + "bdc2324-data/6\n", + "['bdc2324-data/6/6campaign_stats.csv', 'bdc2324-data/6/6campaigns.csv', 'bdc2324-data/6/6categories.csv', 'bdc2324-data/6/6consumptions.csv', 'bdc2324-data/6/6countries.csv', 'bdc2324-data/6/6currencies.csv', 'bdc2324-data/6/6customer_target_mappings.csv', 'bdc2324-data/6/6customersplus.csv', 'bdc2324-data/6/6event_types.csv', 'bdc2324-data/6/6events.csv', 'bdc2324-data/6/6facilities.csv', 'bdc2324-data/6/6link_stats.csv', 'bdc2324-data/6/6pricing_formulas.csv', 'bdc2324-data/6/6product_packs.csv', 'bdc2324-data/6/6products.csv', 'bdc2324-data/6/6products_groups.csv', 'bdc2324-data/6/6purchases.csv', 'bdc2324-data/6/6representation_category_capacities.csv', 'bdc2324-data/6/6representations.csv', 'bdc2324-data/6/6seasons.csv', 'bdc2324-data/6/6structure_tag_mappings.csv', 'bdc2324-data/6/6suppliers.csv', 'bdc2324-data/6/6tags.csv', 'bdc2324-data/6/6target_types.csv', 'bdc2324-data/6/6targets.csv', 'bdc2324-data/6/6tickets.csv', 'bdc2324-data/6/6type_of_pricing_formulas.csv', 'bdc2324-data/6/6type_ofs.csv']\n", + "bdc2324-data/7\n", + "['bdc2324-data/7/7campaign_stats.csv', 'bdc2324-data/7/7campaigns.csv', 'bdc2324-data/7/7categories.csv', 'bdc2324-data/7/7consumptions.csv', 'bdc2324-data/7/7countries.csv', 'bdc2324-data/7/7currencies.csv', 'bdc2324-data/7/7customer_target_mappings.csv', 'bdc2324-data/7/7customersplus.csv', 'bdc2324-data/7/7event_types.csv', 'bdc2324-data/7/7events.csv', 'bdc2324-data/7/7facilities.csv', 'bdc2324-data/7/7link_stats.csv', 'bdc2324-data/7/7pricing_formulas.csv', 'bdc2324-data/7/7product_packs.csv', 'bdc2324-data/7/7products.csv', 'bdc2324-data/7/7products_groups.csv', 'bdc2324-data/7/7purchases.csv', 'bdc2324-data/7/7representation_category_capacities.csv', 'bdc2324-data/7/7representation_types.csv', 'bdc2324-data/7/7representations.csv', 'bdc2324-data/7/7seasons.csv', 'bdc2324-data/7/7structure_tag_mappings.csv', 'bdc2324-data/7/7suppliers.csv', 'bdc2324-data/7/7tags.csv', 'bdc2324-data/7/7target_types.csv', 'bdc2324-data/7/7targets.csv', 'bdc2324-data/7/7tickets.csv', 'bdc2324-data/7/7type_of_categories.csv', 'bdc2324-data/7/7type_of_pricing_formulas.csv', 'bdc2324-data/7/7type_ofs.csv']\n", + "bdc2324-data/8\n", + "['bdc2324-data/8/8campaign_stats.csv', 'bdc2324-data/8/8campaigns.csv', 'bdc2324-data/8/8categories.csv', 'bdc2324-data/8/8countries.csv', 'bdc2324-data/8/8currencies.csv', 'bdc2324-data/8/8customer_target_mappings.csv', 'bdc2324-data/8/8customersplus.csv', 'bdc2324-data/8/8event_types.csv', 'bdc2324-data/8/8events.csv', 'bdc2324-data/8/8facilities.csv', 'bdc2324-data/8/8link_stats.csv', 'bdc2324-data/8/8pricing_formulas.csv', 'bdc2324-data/8/8product_packs.csv', 'bdc2324-data/8/8products.csv', 'bdc2324-data/8/8products_groups.csv', 'bdc2324-data/8/8purchases.csv', 'bdc2324-data/8/8representation_category_capacities.csv', 'bdc2324-data/8/8representations.csv', 'bdc2324-data/8/8seasons.csv', 'bdc2324-data/8/8suppliers.csv', 'bdc2324-data/8/8target_types.csv', 'bdc2324-data/8/8targets.csv', 'bdc2324-data/8/8tickets.csv', 'bdc2324-data/8/8type_of_categories.csv', 'bdc2324-data/8/8type_of_pricing_formulas.csv', 'bdc2324-data/8/8type_ofs.csv']\n", + "bdc2324-data/9\n", + "['bdc2324-data/9/9campaign_stats.csv', 'bdc2324-data/9/9campaigns.csv', 'bdc2324-data/9/9categories.csv', 'bdc2324-data/9/9countries.csv', 'bdc2324-data/9/9currencies.csv', 'bdc2324-data/9/9customer_target_mappings.csv', 'bdc2324-data/9/9customersplus.csv', 'bdc2324-data/9/9event_types.csv', 'bdc2324-data/9/9events.csv', 'bdc2324-data/9/9facilities.csv', 'bdc2324-data/9/9link_stats.csv', 'bdc2324-data/9/9pricing_formulas.csv', 'bdc2324-data/9/9product_packs.csv', 'bdc2324-data/9/9products.csv', 'bdc2324-data/9/9products_groups.csv', 'bdc2324-data/9/9purchases.csv', 'bdc2324-data/9/9representation_category_capacities.csv', 'bdc2324-data/9/9representations.csv', 'bdc2324-data/9/9seasons.csv', 'bdc2324-data/9/9suppliers.csv', 'bdc2324-data/9/9target_types.csv', 'bdc2324-data/9/9targets.csv', 'bdc2324-data/9/9tickets.csv']\n", + "bdc2324-data/10\n", + "['bdc2324-data/10/10campaign_stats.csv', 'bdc2324-data/10/10campaigns.csv', 'bdc2324-data/10/10categories.csv', 'bdc2324-data/10/10countries.csv', 'bdc2324-data/10/10currencies.csv', 'bdc2324-data/10/10customer_target_mappings.csv', 'bdc2324-data/10/10customersplus.csv', 'bdc2324-data/10/10event_types.csv', 'bdc2324-data/10/10events.csv', 'bdc2324-data/10/10facilities.csv', 'bdc2324-data/10/10link_stats.csv', 'bdc2324-data/10/10pricing_formulas.csv', 'bdc2324-data/10/10product_packs.csv', 'bdc2324-data/10/10products.csv', 'bdc2324-data/10/10products_groups.csv', 'bdc2324-data/10/10purchases.csv', 'bdc2324-data/10/10representation_category_capacities.csv', 'bdc2324-data/10/10representation_types.csv', 'bdc2324-data/10/10representations.csv', 'bdc2324-data/10/10seasons.csv', 'bdc2324-data/10/10suppliers.csv', 'bdc2324-data/10/10tags.csv', 'bdc2324-data/10/10target_types.csv', 'bdc2324-data/10/10targets.csv', 'bdc2324-data/10/10tickets.csv', 'bdc2324-data/10/10type_of_pricing_formulas.csv', 'bdc2324-data/10/10type_ofs.csv']\n", + "bdc2324-data/11\n", + "['bdc2324-data/11/11campaign_stats.csv', 'bdc2324-data/11/11campaigns.csv', 'bdc2324-data/11/11categories.csv', 'bdc2324-data/11/11countries.csv', 'bdc2324-data/11/11currencies.csv', 'bdc2324-data/11/11customer_target_mappings.csv', 'bdc2324-data/11/11customersplus.csv', 'bdc2324-data/11/11event_types.csv', 'bdc2324-data/11/11events.csv', 'bdc2324-data/11/11facilities.csv', 'bdc2324-data/11/11link_stats.csv', 'bdc2324-data/11/11pricing_formulas.csv', 'bdc2324-data/11/11product_packs.csv', 'bdc2324-data/11/11products.csv', 'bdc2324-data/11/11products_groups.csv', 'bdc2324-data/11/11purchases.csv', 'bdc2324-data/11/11representation_category_capacities.csv', 'bdc2324-data/11/11representations.csv', 'bdc2324-data/11/11seasons.csv', 'bdc2324-data/11/11structure_tag_mappings.csv', 'bdc2324-data/11/11suppliers.csv', 'bdc2324-data/11/11tags.csv', 'bdc2324-data/11/11target_types.csv', 'bdc2324-data/11/11targets.csv', 'bdc2324-data/11/11tickets.csv']\n", + "bdc2324-data/12\n", + "['bdc2324-data/12/12campaign_stats.csv', 'bdc2324-data/12/12campaigns.csv', 'bdc2324-data/12/12categories.csv', 'bdc2324-data/12/12consumptions.csv', 'bdc2324-data/12/12countries.csv', 'bdc2324-data/12/12currencies.csv', 'bdc2324-data/12/12customer_target_mappings.csv', 'bdc2324-data/12/12customersplus.csv', 'bdc2324-data/12/12event_types.csv', 'bdc2324-data/12/12events.csv', 'bdc2324-data/12/12facilities.csv', 'bdc2324-data/12/12link_stats.csv', 'bdc2324-data/12/12pricing_formulas.csv', 'bdc2324-data/12/12product_packs.csv', 'bdc2324-data/12/12products.csv', 'bdc2324-data/12/12products_groups.csv', 'bdc2324-data/12/12purchases.csv', 'bdc2324-data/12/12representation_category_capacities.csv', 'bdc2324-data/12/12representations.csv', 'bdc2324-data/12/12seasons.csv', 'bdc2324-data/12/12suppliers.csv', 'bdc2324-data/12/12target_types.csv', 'bdc2324-data/12/12targets.csv', 'bdc2324-data/12/12tickets.csv', 'bdc2324-data/12/12type_ofs.csv']\n", + "bdc2324-data/13\n", + "['bdc2324-data/13/13campaign_stats.csv', 'bdc2324-data/13/13campaigns.csv', 'bdc2324-data/13/13categories.csv', 'bdc2324-data/13/13countries.csv', 'bdc2324-data/13/13currencies.csv', 'bdc2324-data/13/13customer_target_mappings.csv', 'bdc2324-data/13/13customersplus.csv', 'bdc2324-data/13/13event_types.csv', 'bdc2324-data/13/13events.csv', 'bdc2324-data/13/13facilities.csv', 'bdc2324-data/13/13link_stats.csv', 'bdc2324-data/13/13pricing_formulas.csv', 'bdc2324-data/13/13product_packs.csv', 'bdc2324-data/13/13products.csv', 'bdc2324-data/13/13products_groups.csv', 'bdc2324-data/13/13purchases.csv', 'bdc2324-data/13/13representation_category_capacities.csv', 'bdc2324-data/13/13representation_types.csv', 'bdc2324-data/13/13representations.csv', 'bdc2324-data/13/13seasons.csv', 'bdc2324-data/13/13structure_tag_mappings.csv', 'bdc2324-data/13/13suppliers.csv', 'bdc2324-data/13/13tags.csv', 'bdc2324-data/13/13target_types.csv', 'bdc2324-data/13/13targets.csv', 'bdc2324-data/13/13tickets.csv']\n", + "bdc2324-data/14\n", + "['bdc2324-data/14/14campaign_stats.csv', 'bdc2324-data/14/14campaigns.csv', 'bdc2324-data/14/14categories.csv', 'bdc2324-data/14/14countries.csv', 'bdc2324-data/14/14currencies.csv', 'bdc2324-data/14/14customer_target_mappings.csv', 'bdc2324-data/14/14customersplus.csv', 'bdc2324-data/14/14event_types.csv', 'bdc2324-data/14/14events.csv', 'bdc2324-data/14/14facilities.csv', 'bdc2324-data/14/14link_stats.csv', 'bdc2324-data/14/14pricing_formulas.csv', 'bdc2324-data/14/14product_packs.csv', 'bdc2324-data/14/14products.csv', 'bdc2324-data/14/14products_groups.csv', 'bdc2324-data/14/14purchases.csv', 'bdc2324-data/14/14representation_category_capacities.csv', 'bdc2324-data/14/14representation_types.csv', 'bdc2324-data/14/14representations.csv', 'bdc2324-data/14/14seasons.csv', 'bdc2324-data/14/14suppliers.csv', 'bdc2324-data/14/14target_types.csv', 'bdc2324-data/14/14targets.csv', 'bdc2324-data/14/14tickets.csv', 'bdc2324-data/14/14type_of_categories.csv', 'bdc2324-data/14/14type_of_pricing_formulas.csv', 'bdc2324-data/14/14type_ofs.csv']\n", + "bdc2324-data/101\n", + "['bdc2324-data/101/101campaign_stats.csv', 'bdc2324-data/101/101campaigns.csv', 'bdc2324-data/101/101categories.csv', 'bdc2324-data/101/101contribution_sites.csv', 'bdc2324-data/101/101contributions.csv', 'bdc2324-data/101/101countries.csv', 'bdc2324-data/101/101currencies.csv', 'bdc2324-data/101/101customer_target_mappings.csv', 'bdc2324-data/101/101customersplus.csv', 'bdc2324-data/101/101event_types.csv', 'bdc2324-data/101/101events.csv', 'bdc2324-data/101/101facilities.csv', 'bdc2324-data/101/101link_stats.csv', 'bdc2324-data/101/101pricing_formulas.csv', 'bdc2324-data/101/101product_packs.csv', 'bdc2324-data/101/101products.csv', 'bdc2324-data/101/101products_groups.csv', 'bdc2324-data/101/101purchases.csv', 'bdc2324-data/101/101representation_category_capacities.csv', 'bdc2324-data/101/101representations.csv', 'bdc2324-data/101/101seasons.csv', 'bdc2324-data/101/101structure_tag_mappings.csv', 'bdc2324-data/101/101suppliers.csv', 'bdc2324-data/101/101tags.csv', 'bdc2324-data/101/101target_types.csv', 'bdc2324-data/101/101targets.csv', 'bdc2324-data/101/101tickets.csv', 'bdc2324-data/101/101tickets_1.csv', 'bdc2324-data/101/101type_of_pricing_formulas.csv', 'bdc2324-data/101/101type_ofs.csv']\n" + ] + } + ], + "source": [ + "# Liste des jeu de données par dossier\n", + "for i in range(1, 15):\n", + " FILE_PATH_S3 = BUCKET + \"/\" + str(i)\n", + " print(FILE_PATH_S3)\n", + " print(fs.ls(FILE_PATH_S3))\n", + "print(BUCKET + \"/101\")\n", + "print(fs.ls(BUCKET + \"/101\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ba9d04ad-6cc1-4bac-b1a0-44bedfb09763", + "metadata": {}, + "outputs": [ + { + "ename": "PermissionError", + "evalue": "The Access Key Id you provided does not exist in our records.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:1333\u001b[0m, in \u001b[0;36m_fetch_range\u001b[0;34m(client, bucket, key, version_id, start, end, max_attempts, req_kw)\u001b[0m\n\u001b[1;32m 1332\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1333\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mBucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mKey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1334\u001b[0m \u001b[43m \u001b[49m\u001b[43mRange\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbytes=\u001b[39;49m\u001b[38;5;132;43;01m%i\u001b[39;49;00m\u001b[38;5;124;43m-\u001b[39;49m\u001b[38;5;132;43;01m%i\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m%\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1335\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mversion_id_kw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion_id\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1336\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mreq_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mBody\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mread()\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mClientError\u001b[0m: An error occurred (InvalidAccessKeyId) when calling the GetObject operation: The Access Key Id you provided does not exist in our records.", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m FILE_PATH_S3 \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbdc2324-data/1/1campaign_stats.csv\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mopen(FILE_PATH_S3, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file_in:\n\u001b[0;32m----> 4\u001b[0m df_bpe \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_in\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m;\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:948\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 935\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 936\u001b[0m dialect,\n\u001b[1;32m 937\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 944\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 945\u001b[0m )\n\u001b[1;32m 946\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 948\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:611\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 608\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 610\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 611\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 613\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 614\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1448\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1445\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1447\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1448\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1723\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1720\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 1722\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1723\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1724\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 1725\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py:93\u001b[0m, in \u001b[0;36mCParserWrapper.__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype_backend\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 91\u001b[0m \u001b[38;5;66;03m# Fail here loudly instead of in cython after reading\u001b[39;00m\n\u001b[1;32m 92\u001b[0m import_optional_dependency(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 93\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reader \u001b[38;5;241m=\u001b[39m \u001b[43mparsers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTextReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39munnamed_cols \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reader\u001b[38;5;241m.\u001b[39munnamed_cols\n\u001b[1;32m 97\u001b[0m \u001b[38;5;66;03m# error: Cannot determine type of 'names'\u001b[39;00m\n", + "File \u001b[0;32mparsers.pyx:579\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mparsers.pyx:668\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._get_header\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mparsers.pyx:879\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mparsers.pyx:890\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._check_tokenize_status\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mparsers.pyx:2050\u001b[0m, in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:1352\u001b[0m, in \u001b[0;36m_fetch_range\u001b[0;34m(client, bucket, key, version_id, start, end, max_attempts, req_kw)\u001b[0m\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m e\u001b[38;5;241m.\u001b[39mresponse[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCode\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnknown\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m416\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 1350\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mInvalidRange\u001b[39m\u001b[38;5;124m'\u001b[39m]:\n\u001b[1;32m 1351\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m-> 1352\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m 1353\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;241m.\u001b[39mlower(): \u001b[38;5;66;03m# Actual exception type changes often\u001b[39;00m\n", + "\u001b[0;31mPermissionError\u001b[0m: The Access Key Id you provided does not exist in our records." + ] + } + ], + "source": [ + "FILE_PATH_S3 = 'bdc2324-data/1/1campaign_stats.csv'\n", + "\n", + "with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n", + " df_bpe = pd.read_csv(file_in, sep=\";\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0178a1d2-30d4-498b-a147-125f30bf1815", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {