Update
This commit is contained in:
parent
8f9f1e0be0
commit
fdac53024a
|
@ -11,38 +11,54 @@
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 1,
|
||||||
|
"id": "88af2795-8bf9-4df0-a059-be7c28fb4289",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e05cd2c9-3f76-48e3-b4a6-5055445af2e4",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Configuration de l'accès aux données"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
"id": "3ba1f385-2a2f-4b0c-be79-66f618469a9f",
|
"id": "3ba1f385-2a2f-4b0c-be79-66f618469a9f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"ename": "PermissionError",
|
||||||
"text/plain": [
|
"evalue": "The Access Key Id you provided does not exist in our records.",
|
||||||
"['bdc2324-data/1',\n",
|
"output_type": "error",
|
||||||
" 'bdc2324-data/10',\n",
|
"traceback": [
|
||||||
" 'bdc2324-data/101',\n",
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
" 'bdc2324-data/11',\n",
|
"\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
|
||||||
" 'bdc2324-data/12',\n",
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:394\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items)\u001b[0m\n\u001b[1;32m 393\u001b[0m dircache \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 394\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m it:\n\u001b[1;32m 395\u001b[0m dircache\u001b[38;5;241m.\u001b[39mextend(i\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCommonPrefixes\u001b[39m\u001b[38;5;124m'\u001b[39m, []))\n",
|
||||||
" 'bdc2324-data/13',\n",
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/paginate.py:269\u001b[0m, in \u001b[0;36mPageIterator.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 269\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcurrent_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 270\u001b[0m parsed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_parsed_response(response)\n",
|
||||||
" 'bdc2324-data/14',\n",
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/paginate.py:357\u001b[0m, in \u001b[0;36mPageIterator._make_request\u001b[0;34m(self, current_kwargs)\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_make_request\u001b[39m(\u001b[38;5;28mself\u001b[39m, current_kwargs):\n\u001b[0;32m--> 357\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcurrent_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
" 'bdc2324-data/2',\n",
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.<locals>._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
" 'bdc2324-data/3',\n",
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
||||||
" 'bdc2324-data/4',\n",
|
"\u001b[0;31mClientError\u001b[0m: An error occurred (InvalidAccessKeyId) when calling the ListObjectsV2 operation: The Access Key Id you provided does not exist in our records.",
|
||||||
" 'bdc2324-data/5',\n",
|
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||||
" 'bdc2324-data/6',\n",
|
"\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)",
|
||||||
" 'bdc2324-data/7',\n",
|
"Cell \u001b[0;32mIn[2], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m fs \u001b[38;5;241m=\u001b[39m s3fs\u001b[38;5;241m.\u001b[39mS3FileSystem(client_kwargs\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mendpoint_url\u001b[39m\u001b[38;5;124m'\u001b[39m: S3_ENDPOINT_URL})\n\u001b[1;32m 8\u001b[0m BUCKET \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbdc2324-data\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 9\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mls\u001b[49m\u001b[43m(\u001b[49m\u001b[43mBUCKET\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
" 'bdc2324-data/8',\n",
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:619\u001b[0m, in \u001b[0;36mS3FileSystem.ls\u001b[0;34m(self, path, detail, refresh, **kwargs)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\" List single \"directory\" with or without details\u001b[39;00m\n\u001b[1;32m 605\u001b[0m \n\u001b[1;32m 606\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 616\u001b[0m \u001b[38;5;124;03m additional arguments passed on\u001b[39;00m\n\u001b[1;32m 617\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 618\u001b[0m path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_strip_protocol(path)\u001b[38;5;241m.\u001b[39mrstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 619\u001b[0m files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_ls\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrefresh\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrefresh\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 620\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m files:\n\u001b[1;32m 621\u001b[0m files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ls(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent(path), refresh\u001b[38;5;241m=\u001b[39mrefresh)\n",
|
||||||
" 'bdc2324-data/9']"
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:487\u001b[0m, in \u001b[0;36mS3FileSystem._ls\u001b[0;34m(self, path, refresh)\u001b[0m\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsbuckets(refresh)\n\u001b[1;32m 486\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 487\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_lsdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrefresh\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
]
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:409\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items)\u001b[0m\n\u001b[1;32m 407\u001b[0m f[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ClientError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 409\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdircache[path] \u001b[38;5;241m=\u001b[39m files\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m files\n",
|
||||||
},
|
"\u001b[0;31mPermissionError\u001b[0m: The Access Key Id you provided does not exist in our records."
|
||||||
"execution_count": 1,
|
]
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import s3fs\n",
|
"import s3fs\n",
|
||||||
"\n",
|
|
||||||
"# Create filesystem object\n",
|
"# Create filesystem object\n",
|
||||||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||||||
|
@ -50,6 +66,109 @@
|
||||||
"BUCKET = \"bdc2324-data\"\n",
|
"BUCKET = \"bdc2324-data\"\n",
|
||||||
"fs.ls(BUCKET)"
|
"fs.ls(BUCKET)"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"id": "d60f6b27-00b4-4655-9325-79169d1e68df",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"bdc2324-data/1\n",
|
||||||
|
"['bdc2324-data/1/1campaign_stats.csv', 'bdc2324-data/1/1campaigns.csv', 'bdc2324-data/1/1categories.csv', 'bdc2324-data/1/1countries.csv', 'bdc2324-data/1/1currencies.csv', 'bdc2324-data/1/1customer_target_mappings.csv', 'bdc2324-data/1/1customersplus.csv', 'bdc2324-data/1/1event_types.csv', 'bdc2324-data/1/1events.csv', 'bdc2324-data/1/1facilities.csv', 'bdc2324-data/1/1link_stats.csv', 'bdc2324-data/1/1pricing_formulas.csv', 'bdc2324-data/1/1product_packs.csv', 'bdc2324-data/1/1products.csv', 'bdc2324-data/1/1products_groups.csv', 'bdc2324-data/1/1purchases.csv', 'bdc2324-data/1/1representation_category_capacities.csv', 'bdc2324-data/1/1representations.csv', 'bdc2324-data/1/1seasons.csv', 'bdc2324-data/1/1structure_tag_mappings.csv', 'bdc2324-data/1/1suppliers.csv', 'bdc2324-data/1/1tags.csv', 'bdc2324-data/1/1target_types.csv', 'bdc2324-data/1/1targets.csv', 'bdc2324-data/1/1tickets.csv', 'bdc2324-data/1/1type_of_categories.csv', 'bdc2324-data/1/1type_of_pricing_formulas.csv', 'bdc2324-data/1/1type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/2\n",
|
||||||
|
"['bdc2324-data/2/2campaign_stats.csv', 'bdc2324-data/2/2campaigns.csv', 'bdc2324-data/2/2categories.csv', 'bdc2324-data/2/2contribution_sites.csv', 'bdc2324-data/2/2contributions.csv', 'bdc2324-data/2/2countries.csv', 'bdc2324-data/2/2currencies.csv', 'bdc2324-data/2/2customer_target_mappings.csv', 'bdc2324-data/2/2customersplus.csv', 'bdc2324-data/2/2event_types.csv', 'bdc2324-data/2/2events.csv', 'bdc2324-data/2/2facilities.csv', 'bdc2324-data/2/2link_stats.csv', 'bdc2324-data/2/2pricing_formulas.csv', 'bdc2324-data/2/2product_packs.csv', 'bdc2324-data/2/2products.csv', 'bdc2324-data/2/2products_groups.csv', 'bdc2324-data/2/2purchases.csv', 'bdc2324-data/2/2representation_category_capacities.csv', 'bdc2324-data/2/2representations.csv', 'bdc2324-data/2/2seasons.csv', 'bdc2324-data/2/2structure_tag_mappings.csv', 'bdc2324-data/2/2suppliers.csv', 'bdc2324-data/2/2tags.csv', 'bdc2324-data/2/2target_types.csv', 'bdc2324-data/2/2targets.csv', 'bdc2324-data/2/2tickets.csv']\n",
|
||||||
|
"bdc2324-data/3\n",
|
||||||
|
"['bdc2324-data/3/3campaign_stats.csv', 'bdc2324-data/3/3campaigns.csv', 'bdc2324-data/3/3categories.csv', 'bdc2324-data/3/3consumptions.csv', 'bdc2324-data/3/3contribution_sites.csv', 'bdc2324-data/3/3contributions.csv', 'bdc2324-data/3/3countries.csv', 'bdc2324-data/3/3currencies.csv', 'bdc2324-data/3/3customer_target_mappings.csv', 'bdc2324-data/3/3customersplus.csv', 'bdc2324-data/3/3event_types.csv', 'bdc2324-data/3/3events.csv', 'bdc2324-data/3/3facilities.csv', 'bdc2324-data/3/3link_stats.csv', 'bdc2324-data/3/3pricing_formulas.csv', 'bdc2324-data/3/3product_packs.csv', 'bdc2324-data/3/3products.csv', 'bdc2324-data/3/3products_groups.csv', 'bdc2324-data/3/3purchases.csv', 'bdc2324-data/3/3representation_category_capacities.csv', 'bdc2324-data/3/3representations.csv', 'bdc2324-data/3/3seasons.csv', 'bdc2324-data/3/3structure_tag_mappings.csv', 'bdc2324-data/3/3suppliers.csv', 'bdc2324-data/3/3tags.csv', 'bdc2324-data/3/3target_types.csv', 'bdc2324-data/3/3targets.csv', 'bdc2324-data/3/3tickets.csv']\n",
|
||||||
|
"bdc2324-data/4\n",
|
||||||
|
"['bdc2324-data/4/4campaign_stats.csv', 'bdc2324-data/4/4campaigns.csv', 'bdc2324-data/4/4categories.csv', 'bdc2324-data/4/4contribution_sites.csv', 'bdc2324-data/4/4contributions.csv', 'bdc2324-data/4/4countries.csv', 'bdc2324-data/4/4currencies.csv', 'bdc2324-data/4/4customer_target_mappings.csv', 'bdc2324-data/4/4customersplus.csv', 'bdc2324-data/4/4event_types.csv', 'bdc2324-data/4/4events.csv', 'bdc2324-data/4/4facilities.csv', 'bdc2324-data/4/4link_stats.csv', 'bdc2324-data/4/4pricing_formulas.csv', 'bdc2324-data/4/4product_packs.csv', 'bdc2324-data/4/4products.csv', 'bdc2324-data/4/4products_groups.csv', 'bdc2324-data/4/4purchases.csv', 'bdc2324-data/4/4representation_category_capacities.csv', 'bdc2324-data/4/4representations.csv', 'bdc2324-data/4/4seasons.csv', 'bdc2324-data/4/4structure_tag_mappings.csv', 'bdc2324-data/4/4suppliers.csv', 'bdc2324-data/4/4tags.csv', 'bdc2324-data/4/4target_types.csv', 'bdc2324-data/4/4targets.csv', 'bdc2324-data/4/4tickets.csv', 'bdc2324-data/4/4type_of_pricing_formulas.csv', 'bdc2324-data/4/4type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/5\n",
|
||||||
|
"['bdc2324-data/5/5campaign_stats.csv', 'bdc2324-data/5/5campaigns.csv', 'bdc2324-data/5/5categories.csv', 'bdc2324-data/5/5consumptions.csv', 'bdc2324-data/5/5countries.csv', 'bdc2324-data/5/5currencies.csv', 'bdc2324-data/5/5customer_target_mappings.csv', 'bdc2324-data/5/5customersplus.csv', 'bdc2324-data/5/5event_types.csv', 'bdc2324-data/5/5events.csv', 'bdc2324-data/5/5facilities.csv', 'bdc2324-data/5/5link_stats.csv', 'bdc2324-data/5/5pricing_formulas.csv', 'bdc2324-data/5/5product_packs.csv', 'bdc2324-data/5/5products.csv', 'bdc2324-data/5/5products_groups.csv', 'bdc2324-data/5/5purchases.csv', 'bdc2324-data/5/5representation_category_capacities.csv', 'bdc2324-data/5/5representations.csv', 'bdc2324-data/5/5seasons.csv', 'bdc2324-data/5/5suppliers.csv', 'bdc2324-data/5/5target_types.csv', 'bdc2324-data/5/5targets.csv', 'bdc2324-data/5/5tickets.csv']\n",
|
||||||
|
"bdc2324-data/6\n",
|
||||||
|
"['bdc2324-data/6/6campaign_stats.csv', 'bdc2324-data/6/6campaigns.csv', 'bdc2324-data/6/6categories.csv', 'bdc2324-data/6/6consumptions.csv', 'bdc2324-data/6/6countries.csv', 'bdc2324-data/6/6currencies.csv', 'bdc2324-data/6/6customer_target_mappings.csv', 'bdc2324-data/6/6customersplus.csv', 'bdc2324-data/6/6event_types.csv', 'bdc2324-data/6/6events.csv', 'bdc2324-data/6/6facilities.csv', 'bdc2324-data/6/6link_stats.csv', 'bdc2324-data/6/6pricing_formulas.csv', 'bdc2324-data/6/6product_packs.csv', 'bdc2324-data/6/6products.csv', 'bdc2324-data/6/6products_groups.csv', 'bdc2324-data/6/6purchases.csv', 'bdc2324-data/6/6representation_category_capacities.csv', 'bdc2324-data/6/6representations.csv', 'bdc2324-data/6/6seasons.csv', 'bdc2324-data/6/6structure_tag_mappings.csv', 'bdc2324-data/6/6suppliers.csv', 'bdc2324-data/6/6tags.csv', 'bdc2324-data/6/6target_types.csv', 'bdc2324-data/6/6targets.csv', 'bdc2324-data/6/6tickets.csv', 'bdc2324-data/6/6type_of_pricing_formulas.csv', 'bdc2324-data/6/6type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/7\n",
|
||||||
|
"['bdc2324-data/7/7campaign_stats.csv', 'bdc2324-data/7/7campaigns.csv', 'bdc2324-data/7/7categories.csv', 'bdc2324-data/7/7consumptions.csv', 'bdc2324-data/7/7countries.csv', 'bdc2324-data/7/7currencies.csv', 'bdc2324-data/7/7customer_target_mappings.csv', 'bdc2324-data/7/7customersplus.csv', 'bdc2324-data/7/7event_types.csv', 'bdc2324-data/7/7events.csv', 'bdc2324-data/7/7facilities.csv', 'bdc2324-data/7/7link_stats.csv', 'bdc2324-data/7/7pricing_formulas.csv', 'bdc2324-data/7/7product_packs.csv', 'bdc2324-data/7/7products.csv', 'bdc2324-data/7/7products_groups.csv', 'bdc2324-data/7/7purchases.csv', 'bdc2324-data/7/7representation_category_capacities.csv', 'bdc2324-data/7/7representation_types.csv', 'bdc2324-data/7/7representations.csv', 'bdc2324-data/7/7seasons.csv', 'bdc2324-data/7/7structure_tag_mappings.csv', 'bdc2324-data/7/7suppliers.csv', 'bdc2324-data/7/7tags.csv', 'bdc2324-data/7/7target_types.csv', 'bdc2324-data/7/7targets.csv', 'bdc2324-data/7/7tickets.csv', 'bdc2324-data/7/7type_of_categories.csv', 'bdc2324-data/7/7type_of_pricing_formulas.csv', 'bdc2324-data/7/7type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/8\n",
|
||||||
|
"['bdc2324-data/8/8campaign_stats.csv', 'bdc2324-data/8/8campaigns.csv', 'bdc2324-data/8/8categories.csv', 'bdc2324-data/8/8countries.csv', 'bdc2324-data/8/8currencies.csv', 'bdc2324-data/8/8customer_target_mappings.csv', 'bdc2324-data/8/8customersplus.csv', 'bdc2324-data/8/8event_types.csv', 'bdc2324-data/8/8events.csv', 'bdc2324-data/8/8facilities.csv', 'bdc2324-data/8/8link_stats.csv', 'bdc2324-data/8/8pricing_formulas.csv', 'bdc2324-data/8/8product_packs.csv', 'bdc2324-data/8/8products.csv', 'bdc2324-data/8/8products_groups.csv', 'bdc2324-data/8/8purchases.csv', 'bdc2324-data/8/8representation_category_capacities.csv', 'bdc2324-data/8/8representations.csv', 'bdc2324-data/8/8seasons.csv', 'bdc2324-data/8/8suppliers.csv', 'bdc2324-data/8/8target_types.csv', 'bdc2324-data/8/8targets.csv', 'bdc2324-data/8/8tickets.csv', 'bdc2324-data/8/8type_of_categories.csv', 'bdc2324-data/8/8type_of_pricing_formulas.csv', 'bdc2324-data/8/8type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/9\n",
|
||||||
|
"['bdc2324-data/9/9campaign_stats.csv', 'bdc2324-data/9/9campaigns.csv', 'bdc2324-data/9/9categories.csv', 'bdc2324-data/9/9countries.csv', 'bdc2324-data/9/9currencies.csv', 'bdc2324-data/9/9customer_target_mappings.csv', 'bdc2324-data/9/9customersplus.csv', 'bdc2324-data/9/9event_types.csv', 'bdc2324-data/9/9events.csv', 'bdc2324-data/9/9facilities.csv', 'bdc2324-data/9/9link_stats.csv', 'bdc2324-data/9/9pricing_formulas.csv', 'bdc2324-data/9/9product_packs.csv', 'bdc2324-data/9/9products.csv', 'bdc2324-data/9/9products_groups.csv', 'bdc2324-data/9/9purchases.csv', 'bdc2324-data/9/9representation_category_capacities.csv', 'bdc2324-data/9/9representations.csv', 'bdc2324-data/9/9seasons.csv', 'bdc2324-data/9/9suppliers.csv', 'bdc2324-data/9/9target_types.csv', 'bdc2324-data/9/9targets.csv', 'bdc2324-data/9/9tickets.csv']\n",
|
||||||
|
"bdc2324-data/10\n",
|
||||||
|
"['bdc2324-data/10/10campaign_stats.csv', 'bdc2324-data/10/10campaigns.csv', 'bdc2324-data/10/10categories.csv', 'bdc2324-data/10/10countries.csv', 'bdc2324-data/10/10currencies.csv', 'bdc2324-data/10/10customer_target_mappings.csv', 'bdc2324-data/10/10customersplus.csv', 'bdc2324-data/10/10event_types.csv', 'bdc2324-data/10/10events.csv', 'bdc2324-data/10/10facilities.csv', 'bdc2324-data/10/10link_stats.csv', 'bdc2324-data/10/10pricing_formulas.csv', 'bdc2324-data/10/10product_packs.csv', 'bdc2324-data/10/10products.csv', 'bdc2324-data/10/10products_groups.csv', 'bdc2324-data/10/10purchases.csv', 'bdc2324-data/10/10representation_category_capacities.csv', 'bdc2324-data/10/10representation_types.csv', 'bdc2324-data/10/10representations.csv', 'bdc2324-data/10/10seasons.csv', 'bdc2324-data/10/10suppliers.csv', 'bdc2324-data/10/10tags.csv', 'bdc2324-data/10/10target_types.csv', 'bdc2324-data/10/10targets.csv', 'bdc2324-data/10/10tickets.csv', 'bdc2324-data/10/10type_of_pricing_formulas.csv', 'bdc2324-data/10/10type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/11\n",
|
||||||
|
"['bdc2324-data/11/11campaign_stats.csv', 'bdc2324-data/11/11campaigns.csv', 'bdc2324-data/11/11categories.csv', 'bdc2324-data/11/11countries.csv', 'bdc2324-data/11/11currencies.csv', 'bdc2324-data/11/11customer_target_mappings.csv', 'bdc2324-data/11/11customersplus.csv', 'bdc2324-data/11/11event_types.csv', 'bdc2324-data/11/11events.csv', 'bdc2324-data/11/11facilities.csv', 'bdc2324-data/11/11link_stats.csv', 'bdc2324-data/11/11pricing_formulas.csv', 'bdc2324-data/11/11product_packs.csv', 'bdc2324-data/11/11products.csv', 'bdc2324-data/11/11products_groups.csv', 'bdc2324-data/11/11purchases.csv', 'bdc2324-data/11/11representation_category_capacities.csv', 'bdc2324-data/11/11representations.csv', 'bdc2324-data/11/11seasons.csv', 'bdc2324-data/11/11structure_tag_mappings.csv', 'bdc2324-data/11/11suppliers.csv', 'bdc2324-data/11/11tags.csv', 'bdc2324-data/11/11target_types.csv', 'bdc2324-data/11/11targets.csv', 'bdc2324-data/11/11tickets.csv']\n",
|
||||||
|
"bdc2324-data/12\n",
|
||||||
|
"['bdc2324-data/12/12campaign_stats.csv', 'bdc2324-data/12/12campaigns.csv', 'bdc2324-data/12/12categories.csv', 'bdc2324-data/12/12consumptions.csv', 'bdc2324-data/12/12countries.csv', 'bdc2324-data/12/12currencies.csv', 'bdc2324-data/12/12customer_target_mappings.csv', 'bdc2324-data/12/12customersplus.csv', 'bdc2324-data/12/12event_types.csv', 'bdc2324-data/12/12events.csv', 'bdc2324-data/12/12facilities.csv', 'bdc2324-data/12/12link_stats.csv', 'bdc2324-data/12/12pricing_formulas.csv', 'bdc2324-data/12/12product_packs.csv', 'bdc2324-data/12/12products.csv', 'bdc2324-data/12/12products_groups.csv', 'bdc2324-data/12/12purchases.csv', 'bdc2324-data/12/12representation_category_capacities.csv', 'bdc2324-data/12/12representations.csv', 'bdc2324-data/12/12seasons.csv', 'bdc2324-data/12/12suppliers.csv', 'bdc2324-data/12/12target_types.csv', 'bdc2324-data/12/12targets.csv', 'bdc2324-data/12/12tickets.csv', 'bdc2324-data/12/12type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/13\n",
|
||||||
|
"['bdc2324-data/13/13campaign_stats.csv', 'bdc2324-data/13/13campaigns.csv', 'bdc2324-data/13/13categories.csv', 'bdc2324-data/13/13countries.csv', 'bdc2324-data/13/13currencies.csv', 'bdc2324-data/13/13customer_target_mappings.csv', 'bdc2324-data/13/13customersplus.csv', 'bdc2324-data/13/13event_types.csv', 'bdc2324-data/13/13events.csv', 'bdc2324-data/13/13facilities.csv', 'bdc2324-data/13/13link_stats.csv', 'bdc2324-data/13/13pricing_formulas.csv', 'bdc2324-data/13/13product_packs.csv', 'bdc2324-data/13/13products.csv', 'bdc2324-data/13/13products_groups.csv', 'bdc2324-data/13/13purchases.csv', 'bdc2324-data/13/13representation_category_capacities.csv', 'bdc2324-data/13/13representation_types.csv', 'bdc2324-data/13/13representations.csv', 'bdc2324-data/13/13seasons.csv', 'bdc2324-data/13/13structure_tag_mappings.csv', 'bdc2324-data/13/13suppliers.csv', 'bdc2324-data/13/13tags.csv', 'bdc2324-data/13/13target_types.csv', 'bdc2324-data/13/13targets.csv', 'bdc2324-data/13/13tickets.csv']\n",
|
||||||
|
"bdc2324-data/14\n",
|
||||||
|
"['bdc2324-data/14/14campaign_stats.csv', 'bdc2324-data/14/14campaigns.csv', 'bdc2324-data/14/14categories.csv', 'bdc2324-data/14/14countries.csv', 'bdc2324-data/14/14currencies.csv', 'bdc2324-data/14/14customer_target_mappings.csv', 'bdc2324-data/14/14customersplus.csv', 'bdc2324-data/14/14event_types.csv', 'bdc2324-data/14/14events.csv', 'bdc2324-data/14/14facilities.csv', 'bdc2324-data/14/14link_stats.csv', 'bdc2324-data/14/14pricing_formulas.csv', 'bdc2324-data/14/14product_packs.csv', 'bdc2324-data/14/14products.csv', 'bdc2324-data/14/14products_groups.csv', 'bdc2324-data/14/14purchases.csv', 'bdc2324-data/14/14representation_category_capacities.csv', 'bdc2324-data/14/14representation_types.csv', 'bdc2324-data/14/14representations.csv', 'bdc2324-data/14/14seasons.csv', 'bdc2324-data/14/14suppliers.csv', 'bdc2324-data/14/14target_types.csv', 'bdc2324-data/14/14targets.csv', 'bdc2324-data/14/14tickets.csv', 'bdc2324-data/14/14type_of_categories.csv', 'bdc2324-data/14/14type_of_pricing_formulas.csv', 'bdc2324-data/14/14type_ofs.csv']\n",
|
||||||
|
"bdc2324-data/101\n",
|
||||||
|
"['bdc2324-data/101/101campaign_stats.csv', 'bdc2324-data/101/101campaigns.csv', 'bdc2324-data/101/101categories.csv', 'bdc2324-data/101/101contribution_sites.csv', 'bdc2324-data/101/101contributions.csv', 'bdc2324-data/101/101countries.csv', 'bdc2324-data/101/101currencies.csv', 'bdc2324-data/101/101customer_target_mappings.csv', 'bdc2324-data/101/101customersplus.csv', 'bdc2324-data/101/101event_types.csv', 'bdc2324-data/101/101events.csv', 'bdc2324-data/101/101facilities.csv', 'bdc2324-data/101/101link_stats.csv', 'bdc2324-data/101/101pricing_formulas.csv', 'bdc2324-data/101/101product_packs.csv', 'bdc2324-data/101/101products.csv', 'bdc2324-data/101/101products_groups.csv', 'bdc2324-data/101/101purchases.csv', 'bdc2324-data/101/101representation_category_capacities.csv', 'bdc2324-data/101/101representations.csv', 'bdc2324-data/101/101seasons.csv', 'bdc2324-data/101/101structure_tag_mappings.csv', 'bdc2324-data/101/101suppliers.csv', 'bdc2324-data/101/101tags.csv', 'bdc2324-data/101/101target_types.csv', 'bdc2324-data/101/101targets.csv', 'bdc2324-data/101/101tickets.csv', 'bdc2324-data/101/101tickets_1.csv', 'bdc2324-data/101/101type_of_pricing_formulas.csv', 'bdc2324-data/101/101type_ofs.csv']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Liste des jeu de données par dossier\n",
|
||||||
|
"for i in range(1, 15):\n",
|
||||||
|
" FILE_PATH_S3 = BUCKET + \"/\" + str(i)\n",
|
||||||
|
" print(FILE_PATH_S3)\n",
|
||||||
|
" print(fs.ls(FILE_PATH_S3))\n",
|
||||||
|
"print(BUCKET + \"/101\")\n",
|
||||||
|
"print(fs.ls(BUCKET + \"/101\"))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 21,
|
||||||
|
"id": "ba9d04ad-6cc1-4bac-b1a0-44bedfb09763",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "PermissionError",
|
||||||
|
"evalue": "The Access Key Id you provided does not exist in our records.",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:1333\u001b[0m, in \u001b[0;36m_fetch_range\u001b[0;34m(client, bucket, key, version_id, start, end, max_attempts, req_kw)\u001b[0m\n\u001b[1;32m 1332\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1333\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mBucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mKey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1334\u001b[0m \u001b[43m \u001b[49m\u001b[43mRange\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbytes=\u001b[39;49m\u001b[38;5;132;43;01m%i\u001b[39;49;00m\u001b[38;5;124;43m-\u001b[39;49m\u001b[38;5;132;43;01m%i\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m%\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1335\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mversion_id_kw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion_id\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1336\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mreq_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mBody\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mread()\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.<locals>._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
||||||
|
"\u001b[0;31mClientError\u001b[0m: An error occurred (InvalidAccessKeyId) when calling the GetObject operation: The Access Key Id you provided does not exist in our records.",
|
||||||
|
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||||
|
"\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[21], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m FILE_PATH_S3 \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbdc2324-data/1/1campaign_stats.csv\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mopen(FILE_PATH_S3, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file_in:\n\u001b[0;32m----> 4\u001b[0m df_bpe \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_in\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m;\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:948\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 935\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 936\u001b[0m dialect,\n\u001b[1;32m 937\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 944\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 945\u001b[0m )\n\u001b[1;32m 946\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 948\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:611\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 608\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 610\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 611\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 613\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 614\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1448\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1445\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1447\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1448\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1723\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1720\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 1722\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1723\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1724\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 1725\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py:93\u001b[0m, in \u001b[0;36mCParserWrapper.__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype_backend\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 91\u001b[0m \u001b[38;5;66;03m# Fail here loudly instead of in cython after reading\u001b[39;00m\n\u001b[1;32m 92\u001b[0m import_optional_dependency(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 93\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reader \u001b[38;5;241m=\u001b[39m \u001b[43mparsers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTextReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39munnamed_cols \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reader\u001b[38;5;241m.\u001b[39munnamed_cols\n\u001b[1;32m 97\u001b[0m \u001b[38;5;66;03m# error: Cannot determine type of 'names'\u001b[39;00m\n",
|
||||||
|
"File \u001b[0;32mparsers.pyx:579\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[0;32mparsers.pyx:668\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._get_header\u001b[0;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[0;32mparsers.pyx:879\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[0;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[0;32mparsers.pyx:890\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._check_tokenize_status\u001b[0;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[0;32mparsers.pyx:2050\u001b[0m, in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[0;34m()\u001b[0m\n",
|
||||||
|
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:1352\u001b[0m, in \u001b[0;36m_fetch_range\u001b[0;34m(client, bucket, key, version_id, start, end, max_attempts, req_kw)\u001b[0m\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m e\u001b[38;5;241m.\u001b[39mresponse[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCode\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnknown\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m416\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 1350\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mInvalidRange\u001b[39m\u001b[38;5;124m'\u001b[39m]:\n\u001b[1;32m 1351\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m-> 1352\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m 1353\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;241m.\u001b[39mlower(): \u001b[38;5;66;03m# Actual exception type changes often\u001b[39;00m\n",
|
||||||
|
"\u001b[0;31mPermissionError\u001b[0m: The Access Key Id you provided does not exist in our records."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"FILE_PATH_S3 = 'bdc2324-data/1/1campaign_stats.csv'\n",
|
||||||
|
"\n",
|
||||||
|
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||||
|
" df_bpe = pd.read_csv(file_in, sep=\";\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0178a1d2-30d4-498b-a147-125f30bf1815",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user