Comparaison des id
This commit is contained in:
parent
fdac53024a
commit
2e1054f4f9
|
@ -34,26 +34,28 @@
|
|||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "PermissionError",
|
||||
"evalue": "The Access Key Id you provided does not exist in our records.",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:394\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items)\u001b[0m\n\u001b[1;32m 393\u001b[0m dircache \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 394\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m it:\n\u001b[1;32m 395\u001b[0m dircache\u001b[38;5;241m.\u001b[39mextend(i\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCommonPrefixes\u001b[39m\u001b[38;5;124m'\u001b[39m, []))\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/paginate.py:269\u001b[0m, in \u001b[0;36mPageIterator.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 269\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcurrent_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 270\u001b[0m parsed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_extract_parsed_response(response)\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/paginate.py:357\u001b[0m, in \u001b[0;36mPageIterator._make_request\u001b[0;34m(self, current_kwargs)\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_make_request\u001b[39m(\u001b[38;5;28mself\u001b[39m, current_kwargs):\n\u001b[0;32m--> 357\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcurrent_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.<locals>._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
||||
"\u001b[0;31mClientError\u001b[0m: An error occurred (InvalidAccessKeyId) when calling the ListObjectsV2 operation: The Access Key Id you provided does not exist in our records.",
|
||||
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||
"\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[2], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m fs \u001b[38;5;241m=\u001b[39m s3fs\u001b[38;5;241m.\u001b[39mS3FileSystem(client_kwargs\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mendpoint_url\u001b[39m\u001b[38;5;124m'\u001b[39m: S3_ENDPOINT_URL})\n\u001b[1;32m 8\u001b[0m BUCKET \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbdc2324-data\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 9\u001b[0m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mls\u001b[49m\u001b[43m(\u001b[49m\u001b[43mBUCKET\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:619\u001b[0m, in \u001b[0;36mS3FileSystem.ls\u001b[0;34m(self, path, detail, refresh, **kwargs)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\" List single \"directory\" with or without details\u001b[39;00m\n\u001b[1;32m 605\u001b[0m \n\u001b[1;32m 606\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 616\u001b[0m \u001b[38;5;124;03m additional arguments passed on\u001b[39;00m\n\u001b[1;32m 617\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 618\u001b[0m path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_strip_protocol(path)\u001b[38;5;241m.\u001b[39mrstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 619\u001b[0m files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_ls\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrefresh\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrefresh\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 620\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m files:\n\u001b[1;32m 621\u001b[0m files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ls(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_parent(path), refresh\u001b[38;5;241m=\u001b[39mrefresh)\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:487\u001b[0m, in \u001b[0;36mS3FileSystem._ls\u001b[0;34m(self, path, refresh)\u001b[0m\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lsbuckets(refresh)\n\u001b[1;32m 486\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 487\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_lsdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrefresh\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:409\u001b[0m, in \u001b[0;36mS3FileSystem._lsdir\u001b[0;34m(self, path, refresh, max_items)\u001b[0m\n\u001b[1;32m 407\u001b[0m f[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ClientError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 409\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdircache[path] \u001b[38;5;241m=\u001b[39m files\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m files\n",
|
||||
"\u001b[0;31mPermissionError\u001b[0m: The Access Key Id you provided does not exist in our records."
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['bdc2324-data/1',\n",
|
||||
" 'bdc2324-data/10',\n",
|
||||
" 'bdc2324-data/101',\n",
|
||||
" 'bdc2324-data/11',\n",
|
||||
" 'bdc2324-data/12',\n",
|
||||
" 'bdc2324-data/13',\n",
|
||||
" 'bdc2324-data/14',\n",
|
||||
" 'bdc2324-data/2',\n",
|
||||
" 'bdc2324-data/3',\n",
|
||||
" 'bdc2324-data/4',\n",
|
||||
" 'bdc2324-data/5',\n",
|
||||
" 'bdc2324-data/6',\n",
|
||||
" 'bdc2324-data/7',\n",
|
||||
" 'bdc2324-data/8',\n",
|
||||
" 'bdc2324-data/9']"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
|
@ -69,7 +71,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 3,
|
||||
"id": "d60f6b27-00b4-4655-9325-79169d1e68df",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -122,53 +124,329 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 32,
|
||||
"id": "ba9d04ad-6cc1-4bac-b1a0-44bedfb09763",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "PermissionError",
|
||||
"evalue": "The Access Key Id you provided does not exist in our records.",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:1333\u001b[0m, in \u001b[0;36m_fetch_range\u001b[0;34m(client, bucket, key, version_id, start, end, max_attempts, req_kw)\u001b[0m\n\u001b[1;32m 1332\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1333\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mBucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mKey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1334\u001b[0m \u001b[43m \u001b[49m\u001b[43mRange\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mbytes=\u001b[39;49m\u001b[38;5;132;43;01m%i\u001b[39;49;00m\u001b[38;5;124;43m-\u001b[39;49m\u001b[38;5;132;43;01m%i\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m%\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1335\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mversion_id_kw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion_id\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1336\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mreq_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mBody\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mread()\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.<locals>._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
||||
"\u001b[0;31mClientError\u001b[0m: An error occurred (InvalidAccessKeyId) when calling the GetObject operation: The Access Key Id you provided does not exist in our records.",
|
||||
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||
"\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[21], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m FILE_PATH_S3 \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbdc2324-data/1/1campaign_stats.csv\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mopen(FILE_PATH_S3, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file_in:\n\u001b[0;32m----> 4\u001b[0m df_bpe \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_in\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m;\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:948\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 935\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m 936\u001b[0m dialect,\n\u001b[1;32m 937\u001b[0m delimiter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 944\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 945\u001b[0m )\n\u001b[1;32m 946\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 948\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:611\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 608\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 610\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 611\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 613\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m 614\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1448\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1445\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1447\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1448\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1723\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1720\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m 1722\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1723\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmapping\u001b[49m\u001b[43m[\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1724\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 1725\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py:93\u001b[0m, in \u001b[0;36mCParserWrapper.__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype_backend\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 91\u001b[0m \u001b[38;5;66;03m# Fail here loudly instead of in cython after reading\u001b[39;00m\n\u001b[1;32m 92\u001b[0m import_optional_dependency(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyarrow\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 93\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reader \u001b[38;5;241m=\u001b[39m \u001b[43mparsers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTextReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39munnamed_cols \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reader\u001b[38;5;241m.\u001b[39munnamed_cols\n\u001b[1;32m 97\u001b[0m \u001b[38;5;66;03m# error: Cannot determine type of 'names'\u001b[39;00m\n",
|
||||
"File \u001b[0;32mparsers.pyx:579\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mparsers.pyx:668\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._get_header\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mparsers.pyx:879\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mparsers.pyx:890\u001b[0m, in \u001b[0;36mpandas._libs.parsers.TextReader._check_tokenize_status\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mparsers.pyx:2050\u001b[0m, in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/s3fs/core.py:1352\u001b[0m, in \u001b[0;36m_fetch_range\u001b[0;34m(client, bucket, key, version_id, start, end, max_attempts, req_kw)\u001b[0m\n\u001b[1;32m 1349\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m e\u001b[38;5;241m.\u001b[39mresponse[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mError\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCode\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnknown\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m416\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 1350\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mInvalidRange\u001b[39m\u001b[38;5;124m'\u001b[39m]:\n\u001b[1;32m 1351\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m-> 1352\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m translate_boto_error(e)\n\u001b[1;32m 1353\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtime\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;241m.\u001b[39mlower(): \u001b[38;5;66;03m# Actual exception type changes often\u001b[39;00m\n",
|
||||
"\u001b[0;31mPermissionError\u001b[0m: The Access Key Id you provided does not exist in our records."
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Chargement des fichiers campaign_stats.csv\n",
|
||||
"FILE_PATH_S3 = 'bdc2324-data/1/1campaign_stats.csv'\n",
|
||||
"\n",
|
||||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||
" df_bpe = pd.read_csv(file_in, sep=\";\")"
|
||||
" campaign_stats_1 = pd.read_csv(file_in, sep=\",\")\n",
|
||||
"\n",
|
||||
"FILE_PATH_S3 = 'bdc2324-data/2/2campaign_stats.csv'\n",
|
||||
"\n",
|
||||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||
" campaign_stats_2 = pd.read_csv(file_in, sep=\",\")\n",
|
||||
"\n",
|
||||
"FILE_PATH_S3 = 'bdc2324-data/3/3campaign_stats.csv'\n",
|
||||
"\n",
|
||||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||
" campaign_stats_3 = pd.read_csv(file_in, sep=\",\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0178a1d2-30d4-498b-a147-125f30bf1815",
|
||||
"execution_count": 34,
|
||||
"id": "cacaecc1-4d8a-4e20-8cd3-b452cf17db56",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"# Conversion des dates 'sent_at'\n",
|
||||
"campaign_stats_1['sent_at'] = pd.to_datetime(campaign_stats_1['sent_at'], format = 'ISO8601', utc = True)\n",
|
||||
"campaign_stats_2['sent_at'] = pd.to_datetime(campaign_stats_2['sent_at'], format = 'ISO8601', utc = True)\n",
|
||||
"campaign_stats_3['sent_at'] = pd.to_datetime(campaign_stats_3['sent_at'], format = 'ISO8601', utc = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "2ec4b583-dc64-43e9-b3ae-6bbaee0bc135",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-11-09 18:10:45+00:00\n",
|
||||
"2020-06-02 08:24:08+00:00\n",
|
||||
"2023-10-12 01:39:48+00:00\n",
|
||||
"2023-10-10 17:06:29+00:00\n",
|
||||
"2023-11-01 09:20:48+00:00\n",
|
||||
"2021-03-31 14:59:02+00:00\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Chaque unites correspond à une période ? --> Non, les dossiers ont juste pour but de réduire la taille des fichiers\n",
|
||||
"print(campaign_stats_1['sent_at'].max())\n",
|
||||
"print(campaign_stats_1['sent_at'].min())\n",
|
||||
"\n",
|
||||
"print(campaign_stats_2['sent_at'].max())\n",
|
||||
"print(campaign_stats_2['sent_at'].min())\n",
|
||||
"\n",
|
||||
"print(campaign_stats_3['sent_at'].max())\n",
|
||||
"print(campaign_stats_3['sent_at'].min())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "77894273-b3e5-4f29-bd63-9f4df8082b9b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0 2021-03-28 18:01:09+02:00\n",
|
||||
"1 2021-03-28 18:01:09+02:00\n",
|
||||
"2 2021-03-28 18:00:59+02:00\n",
|
||||
"3 2021-03-28 18:00:59+02:00\n",
|
||||
"4 2021-03-28 18:01:06+02:00\n",
|
||||
" ... \n",
|
||||
"6214803 2023-10-23 11:32:33+02:00\n",
|
||||
"6214804 2023-10-23 11:32:49+02:00\n",
|
||||
"6214805 2023-10-23 11:33:28+02:00\n",
|
||||
"6214806 2023-10-23 11:31:53+02:00\n",
|
||||
"6214807 2023-10-23 11:33:54+02:00\n",
|
||||
"Name: sent_at, Length: 6214808, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"campaign_stats_1['sent_at']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "31f2edbf-5661-4516-9835-06d4da615c13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customersplus.csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "4223c873-cbd3-46d1-ac96-c9a3b9e97092",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_426/4118060109.py:9: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||||
" customers_plus_2 = pd.read_csv(file_in, sep=\",\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"FILE_PATH_S3 = 'bdc2324-data/1/1customersplus.csv'\n",
|
||||
"\n",
|
||||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||
" customers_plus_1 = pd.read_csv(file_in, sep=\",\")\n",
|
||||
"\n",
|
||||
"FILE_PATH_S3 = 'bdc2324-data/2/2customersplus.csv'\n",
|
||||
"\n",
|
||||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||||
" customers_plus_2 = pd.read_csv(file_in, sep=\",\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "460f853a-68c0-42a7-9877-b83d3aaec813",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Index(['id', 'lastname', 'firstname', 'birthdate', 'email', 'street_id',\n",
|
||||
" 'created_at', 'updated_at', 'civility', 'is_partner', 'extra',\n",
|
||||
" 'deleted_at', 'reference', 'gender', 'is_email_true', 'extra_field',\n",
|
||||
" 'identifier', 'opt_in', 'structure_id', 'note', 'profession',\n",
|
||||
" 'language', 'mcp_contact_id', 'need_reload', 'last_buying_date',\n",
|
||||
" 'max_price', 'ticket_sum', 'average_price', 'fidelity',\n",
|
||||
" 'average_purchase_delay', 'average_price_basket',\n",
|
||||
" 'average_ticket_basket', 'total_price', 'preferred_category',\n",
|
||||
" 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
|
||||
" 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
|
||||
" 'tenant_id'],\n",
|
||||
" dtype='object')"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"customers_plus.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "d5a9398f-72fc-4548-9f53-b20b372144b2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(151866, 43)"
|
||||
]
|
||||
},
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"customers_plus.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"id": "7467ddbe-0bd4-44cc-8a16-84aa41853638",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"151866"
|
||||
]
|
||||
},
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"customers_plus_1['id'].nunique()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"id": "e15f05f8-3a89-4fc3-84a9-dae70e168440",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"275622"
|
||||
]
|
||||
},
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"customers_plus_2['id'].nunique()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"id": "b40a653e-013f-48d0-8b57-0284587b36c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"id": "32fa2215-3c79-40b5-8643-755865959fc7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1"
|
||||
]
|
||||
},
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"common_id = set(customers_plus_2['id']).intersection(customers_plus_1['id'])\n",
|
||||
"# Exemple id commun = caractéristiques communes\n",
|
||||
"print(customers_plus_2[customers_plus_2['id'] == list(common_id)[0]])\n",
|
||||
"\n",
|
||||
"print(customers_plus_1[customers_plus_1['id'] == list(common_id)[0]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "0eb345e4-69f5-4e16-ac57-e33674c6c43d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"id 0.000000\n",
|
||||
"lastname 43.461341\n",
|
||||
"firstname 44.995588\n",
|
||||
"birthdate 96.419870\n",
|
||||
"email 8.622075\n",
|
||||
"street_id 0.000000\n",
|
||||
"created_at 0.000000\n",
|
||||
"updated_at 0.000000\n",
|
||||
"civility 100.000000\n",
|
||||
"is_partner 0.000000\n",
|
||||
"extra 100.000000\n",
|
||||
"deleted_at 100.000000\n",
|
||||
"reference 100.000000\n",
|
||||
"gender 0.000000\n",
|
||||
"is_email_true 0.000000\n",
|
||||
"extra_field 100.000000\n",
|
||||
"identifier 0.000000\n",
|
||||
"opt_in 0.000000\n",
|
||||
"structure_id 88.072380\n",
|
||||
"note 99.403421\n",
|
||||
"profession 95.913503\n",
|
||||
"language 99.280945\n",
|
||||
"mcp_contact_id 34.876141\n",
|
||||
"need_reload 0.000000\n",
|
||||
"last_buying_date 51.653431\n",
|
||||
"max_price 51.653431\n",
|
||||
"ticket_sum 0.000000\n",
|
||||
"average_price 8.639195\n",
|
||||
"fidelity 0.000000\n",
|
||||
"average_purchase_delay 51.653431\n",
|
||||
"average_price_basket 51.653431\n",
|
||||
"average_ticket_basket 51.653431\n",
|
||||
"total_price 43.014236\n",
|
||||
"preferred_category 100.000000\n",
|
||||
"preferred_supplier 100.000000\n",
|
||||
"preferred_formula 100.000000\n",
|
||||
"purchase_count 0.000000\n",
|
||||
"first_buying_date 51.653431\n",
|
||||
"last_visiting_date 100.000000\n",
|
||||
"zipcode 71.176564\n",
|
||||
"country 5.459418\n",
|
||||
"age 96.419870\n",
|
||||
"tenant_id 0.000000\n",
|
||||
"dtype: float64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(customers_plus.isna().mean()*100)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
Loading…
Reference in New Issue
Block a user