Exploration tags et target

This commit is contained in:
Antoine JOUBREL 2024-02-25 17:33:24 +00:00
parent fa9c1c790e
commit 716002bdcf

View File

@ -44,8 +44,968 @@
},
{
"cell_type": "markdown",
"id": "c437eaec",
"id": "0294ce71-840e-458b-8ffa-cadabbc6da21",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Debut Travail 25/02"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "30d77451-2df6-4c07-8b15-66e0e990ff03",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"\n",
"# Import cleaning and merge functions\n",
"exec(open('0_KPI_functions.py').read())\n",
"\n",
"# Ignore warning\n",
"warnings.filterwarnings('ignore')\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f1b44d3e-76bb-4860-b9db-a2840db7cf39",
"metadata": {},
"outputs": [],
"source": [
"def load_dataset_2(directory_path, file_name):\n",
" \"\"\"\n",
" This function loads csv file\n",
" \"\"\"\n",
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\")\n",
"\n",
" # drop na :\n",
" #df = df.dropna(axis=1, thresh=len(df))\n",
" # if identifier in table : delete it\n",
" if 'identifier' in df.columns:\n",
" df = df.drop(columns = 'identifier')\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "35da2e15-1e23-4653-a214-c6ff8f186e85",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6009745</td>\n",
" <td>1372685</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>af</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6011228</td>\n",
" <td>1372685</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>af</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6058950</td>\n",
" <td>1372685</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>af</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>6062404</td>\n",
" <td>1372685</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>af</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>250217</td>\n",
" <td>78785</td>\n",
" <td>NaN</td>\n",
" <td>11035.0</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>471593</th>\n",
" <td>4976621</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>4732462.0</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>471594</th>\n",
" <td>4976636</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>4731717.0</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>471595</th>\n",
" <td>4976637</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>4731674.0</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>471596</th>\n",
" <td>4976645</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>4731549.0</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>471597</th>\n",
" <td>4976666</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>4731118.0</td>\n",
" <td>0</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>471598 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 6009745 1372685 NaN NaN 0 \n",
"1 6011228 1372685 NaN NaN 0 \n",
"2 6058950 1372685 NaN NaN 0 \n",
"3 6062404 1372685 NaN NaN 0 \n",
"4 250217 78785 NaN 11035.0 0 \n",
"... ... ... ... ... ... \n",
"471593 4976621 3 NaN 4732462.0 0 \n",
"471594 4976636 3 NaN 4731717.0 0 \n",
"471595 4976637 3 NaN 4731674.0 0 \n",
"471596 4976645 3 NaN 4731549.0 0 \n",
"471597 4976666 3 NaN 4731118.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 1771 False NaN 2 True ... \n",
"1 1771 False NaN 2 True ... \n",
"2 1771 False NaN 2 True ... \n",
"3 1771 False NaN 2 True ... \n",
"4 1771 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"471593 1771 False NaN 0 True ... \n",
"471594 1771 False NaN 2 True ... \n",
"471595 1771 False NaN 0 True ... \n",
"471596 1771 False NaN 2 True ... \n",
"471597 1771 False NaN 0 True ... \n",
"\n",
" max_price ticket_sum average_price average_purchase_delay \\\n",
"0 NaN 0 NaN NaN \n",
"1 NaN 0 NaN NaN \n",
"2 NaN 0 NaN NaN \n",
"3 NaN 0 NaN NaN \n",
"4 NaN 0 0.0 NaN \n",
"... ... ... ... ... \n",
"471593 NaN 0 NaN NaN \n",
"471594 NaN 0 NaN NaN \n",
"471595 NaN 0 NaN NaN \n",
"471596 NaN 0 NaN NaN \n",
"471597 NaN 0 NaN NaN \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"0 NaN NaN 0.0 \n",
"1 NaN NaN 0.0 \n",
"2 NaN NaN 0.0 \n",
"3 NaN NaN 0.0 \n",
"4 NaN NaN NaN \n",
"... ... ... ... \n",
"471593 NaN NaN 0.0 \n",
"471594 NaN NaN 0.0 \n",
"471595 NaN NaN 0.0 \n",
"471596 NaN NaN 0.0 \n",
"471597 NaN NaN 0.0 \n",
"\n",
" purchase_count first_buying_date country \n",
"0 0 NaN af \n",
"1 0 NaN af \n",
"2 0 NaN af \n",
"3 0 NaN af \n",
"4 0 NaN fr \n",
"... ... ... ... \n",
"471593 0 NaN NaN \n",
"471594 0 NaN NaN \n",
"471595 0 NaN NaN \n",
"471596 0 NaN NaN \n",
"471597 0 NaN NaN \n",
"\n",
"[471598 rows x 22 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"display_databases(\"5\", \"customerplus_cleaned\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "6c8ad8c3-25df-4fe4-9ad0-ee5f9498bc14",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>code</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>101</td>\n",
" <td>hongrie</td>\n",
" <td>hu</td>\n",
" <td>2023-06-13 11:17:40.600622+02:00</td>\n",
" <td>2023-06-13 11:17:40.600622+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>albanie</td>\n",
" <td>al</td>\n",
" <td>2023-06-13 11:17:40.540652+02:00</td>\n",
" <td>2023-06-13 11:17:40.540652+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>antarctique</td>\n",
" <td>aq</td>\n",
" <td>2023-06-13 11:17:40.541315+02:00</td>\n",
" <td>2023-06-13 11:17:40.541315+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12</td>\n",
" <td>autriche</td>\n",
" <td>at</td>\n",
" <td>2023-06-13 11:17:40.546711+02:00</td>\n",
" <td>2023-06-13 11:17:40.546711+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>samoa américaines</td>\n",
" <td>as</td>\n",
" <td>2023-06-13 11:17:40.542569+02:00</td>\n",
" <td>2023-06-13 11:17:40.542569+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238</th>\n",
" <td>228</td>\n",
" <td>royaume-uni</td>\n",
" <td>gb</td>\n",
" <td>2023-06-13 11:17:40.678023+02:00</td>\n",
" <td>2023-06-13 11:17:40.678023+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>239</th>\n",
" <td>25</td>\n",
" <td>brésil</td>\n",
" <td>br</td>\n",
" <td>2023-06-13 11:17:40.554209+02:00</td>\n",
" <td>2023-06-13 11:17:40.554209+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240</th>\n",
" <td>10</td>\n",
" <td>argentine</td>\n",
" <td>ar</td>\n",
" <td>2023-06-13 11:17:40.545489+02:00</td>\n",
" <td>2023-06-13 11:17:40.545489+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>241</th>\n",
" <td>203</td>\n",
" <td>espagne</td>\n",
" <td>es</td>\n",
" <td>2023-06-13 11:17:40.662472+02:00</td>\n",
" <td>2023-06-13 11:17:40.662472+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>242</th>\n",
" <td>192</td>\n",
" <td>arabie saoudite</td>\n",
" <td>sa</td>\n",
" <td>2023-06-13 11:17:40.656154+02:00</td>\n",
" <td>2023-06-13 11:17:40.656154+02:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>243 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id name code created_at \\\n",
"0 101 hongrie hu 2023-06-13 11:17:40.600622+02:00 \n",
"1 2 albanie al 2023-06-13 11:17:40.540652+02:00 \n",
"2 3 antarctique aq 2023-06-13 11:17:40.541315+02:00 \n",
"3 12 autriche at 2023-06-13 11:17:40.546711+02:00 \n",
"4 5 samoa américaines as 2023-06-13 11:17:40.542569+02:00 \n",
".. ... ... ... ... \n",
"238 228 royaume-uni gb 2023-06-13 11:17:40.678023+02:00 \n",
"239 25 brésil br 2023-06-13 11:17:40.554209+02:00 \n",
"240 10 argentine ar 2023-06-13 11:17:40.545489+02:00 \n",
"241 203 espagne es 2023-06-13 11:17:40.662472+02:00 \n",
"242 192 arabie saoudite sa 2023-06-13 11:17:40.656154+02:00 \n",
"\n",
" updated_at \n",
"0 2023-06-13 11:17:40.600622+02:00 \n",
"1 2023-06-13 11:17:40.540652+02:00 \n",
"2 2023-06-13 11:17:40.541315+02:00 \n",
"3 2023-06-13 11:17:40.546711+02:00 \n",
"4 2023-06-13 11:17:40.542569+02:00 \n",
".. ... \n",
"238 2023-06-13 11:17:40.678023+02:00 \n",
"239 2023-06-13 11:17:40.554209+02:00 \n",
"240 2023-06-13 11:17:40.545489+02:00 \n",
"241 2023-06-13 11:17:40.662472+02:00 \n",
"242 2023-06-13 11:17:40.656154+02:00 \n",
"\n",
"[243 rows x 5 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"load_dataset_2(\"7\", \"countries\")"
]
},
{
"cell_type": "markdown",
"id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Base communes au types Musée"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8f988fb-5aab-4b57-80d1-e242f7e5b384",
"metadata": {},
"outputs": [],
"source": [
"companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
" 'sport': ['5', '6', '7', '8', '9'],\n",
" 'musique' : ['10', '11', '12', '13', '14']}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbce1124-9a22-4502-a47a-fc3d0e2db70b",
"metadata": {},
"outputs": [],
"source": [
"companies['musee']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5080f66e-f779-410a-876d-b4fe2795e17e",
"metadata": {},
"outputs": [],
"source": [
"for i in companies['musique']:\n",
" BUCKET = \"bdc2324-data/\"+i\n",
" liste_base = []\n",
" for base in fs.ls(BUCKET):\n",
" match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n",
" if match:\n",
" nom_base = match.group(3)\n",
" liste_base.append(nom_base)\n",
" globals()['base_'+i] = liste_base\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "abd477e1-7479-4c88-a5aa-f987af3f5b79",
"metadata": {},
"outputs": [],
"source": [
"# Trouver l'intersection entre les cinq listes\n",
"intersection = set(base_1).intersection(base_2, base_3, base_4, base_101)\n",
"\n",
"# Convertir le résultat en liste si nécessaire\n",
"intersection_liste = list(intersection)\n",
"\n",
"print(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d93888f-a511-4ee5-8bc3-d5173a7f119e",
"metadata": {},
"outputs": [],
"source": [
"# Trouver l'intersection entre les cinq listes\n",
"intersection = set(base_10).intersection(base_12, base_13, base_14, base_11)\n",
"\n",
"# Convertir le résultat en liste si nécessaire\n",
"intersection_liste = list(intersection)\n",
"\n",
"print(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10e89669-42bb-4652-a4bc-1a3d1caf4d1a",
"metadata": {},
"outputs": [],
"source": [
"len(intersection_liste)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e0aa8976-1487-4ef5-898e-0d6a88183e67",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "67f02868-b16a-41d5-a0f9-b31ce09278db",
"metadata": {},
"outputs": [],
"source": [
"base_101"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d058b21-a538-4f59-aefb-ef7966f73fdc",
"metadata": {},
"outputs": [],
"source": [
"df1_tags = load_dataset_2(\"1\", \"tags\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa441f99-733c-4675-8676-bed4682d3324",
"metadata": {},
"outputs": [],
"source": [
"df1_structure_tag_mappings = load_dataset_2(\"1\", 'structure_tag_mappings')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6767a750-14a4-4c05-903e-d2f07170825b",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus = load_dataset_2(\"1\", \"customersplus\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "125e9145-a815-46fd-bdf4-07589508b259",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus.groupby('structure_id')['id'].count().reset_index().sort_values('id', ascending=False).head(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c17a6976-792f-474d-bcff-c89396eddb3f",
"metadata": {},
"outputs": [],
"source": [
"df1_customersplus['structure_id'].isna().sum() / len(df1_customersplus['structure_id'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecfc155a-cb42-46ec-8da5-33fdcd087355",
"metadata": {},
"outputs": [],
"source": [
"len(df1_structure_tag_mappings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "071410b8-950d-4fcc-b2b9-57415253c286",
"metadata": {},
"outputs": [],
"source": [
"df1_structure_tag_mappings.groupby('tag_id')['structure_id'].count().reset_index().sort_values('structure_id', ascending=False).head(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f48d27a9-14e4-4bb9-a60a-73e9438b58fc",
"metadata": {},
"outputs": [],
"source": [
"?np.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14eaa0ea-02cc-430b-ab9b-38e6637810c3",
"metadata": {},
"outputs": [],
"source": [
"def info_colonnes_dataframe(df):\n",
" # Créer une liste pour stocker les informations sur chaque colonne\n",
" infos_colonnes = []\n",
"\n",
" # Parcourir les colonnes du DataFrame\n",
" for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n",
" # Calculer le taux de valeurs manquantes\n",
" taux_na = serie.isna().mean() * 100\n",
"\n",
" # Ajouter les informations à la liste\n",
" infos_colonnes.append({\n",
" 'Nom_colonne': nom_colonne,\n",
" 'Type_colonne': str(serie.dtype),\n",
" 'Taux_NA': taux_na\n",
" })\n",
"\n",
" # Créer une nouvelle DataFrame à partir de la liste d'informations\n",
" df_infos_colonnes = pd.DataFrame(infos_colonnes)\n",
"\n",
" return df_infos_colonnes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b031c32-d4c8-42a5-9a71-a7810f9bf8d8",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"info_colonnes_dataframe(df1_tags)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1a87f27-c4d4-4832-ac20-0c3c54aa4980",
"metadata": {},
"outputs": [],
"source": [
"info_colonnes_dataframe(df1_structure_tag_mappings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa5c65a8-2f74-4f3f-85fc-9ac91e0bb361",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_colwidth', None)\n",
"\n",
"print(df1_tags['name'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a59bf932-5b54-4600-81f5-c55ac93ae510",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4ab298e-2cae-4865-9f00-4caff5f75ea1",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"print(df1_tags['name'])"
]
},
{
"cell_type": "markdown",
"id": "76bffba1-5f7e-4308-9224-437ca66148f8",
"metadata": {},
"source": [
"## KPI sur target_type"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "622752ed-b565-4188-86d6-38f1f333fcbe",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n"
]
},
{
"ename": "PermissionError",
"evalue": "Forbidden",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:529\u001b[0m, in \u001b[0;36mS3FileSystem.info\u001b[0;34m(self, path, version_id, refresh)\u001b[0m\n\u001b[1;32m 528\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 529\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_s3\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ms3\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhead_object\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 530\u001b[0m \u001b[43m \u001b[49m\u001b[43mKey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mversion_id_kw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion_id\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreq_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 531\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[1;32m 532\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m: out[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 533\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([bucket, key]),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVersionId\u001b[39m\u001b[38;5;124m'\u001b[39m: out\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVersionId\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 541\u001b[0m }\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:200\u001b[0m, in \u001b[0;36mS3FileSystem._call_s3\u001b[0;34m(self, method, *akwarglist, **kwargs)\u001b[0m\n\u001b[1;32m 198\u001b[0m additional_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_s3_method_kwargs(method, \u001b[38;5;241m*\u001b[39makwarglist,\n\u001b[1;32m 199\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.<locals>._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"\u001b[0;31mClientError\u001b[0m: An error occurred (403) when calling the HeadObject operation: Forbidden",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[74], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdisplay_databases\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m1\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtarget_information\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m<string>:12\u001b[0m, in \u001b[0;36mdisplay_databases\u001b[0;34m(directory_path, file_name, datetime_col)\u001b[0m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1295\u001b[0m, in \u001b[0;36mAbstractFileSystem.open\u001b[0;34m(self, path, mode, block_size, cache_options, compression, **kwargs)\u001b[0m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1294\u001b[0m ac \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mautocommit\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_intrans)\n\u001b[0;32m-> 1295\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_open\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1296\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1297\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1298\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1299\u001b[0m \u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mac\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1300\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1301\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1302\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompression\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m compr\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:375\u001b[0m, in \u001b[0;36mS3FileSystem._open\u001b[0;34m(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, **kwargs)\u001b[0m\n\u001b[1;32m 372\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_type \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 373\u001b[0m cache_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_cache_type\n\u001b[0;32m--> 375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mS3File\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43macl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43macl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 376\u001b[0m \u001b[43m \u001b[49m\u001b[43mversion_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[43m \u001b[49m\u001b[43ms3_additional_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcache_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 378\u001b[0m \u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mautocommit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequester_pays\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequester_pays\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:1096\u001b[0m, in \u001b[0;36mS3File.__init__\u001b[0;34m(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays)\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39ms3_additional_kwargs \u001b[38;5;241m=\u001b[39m s3_additional_kwargs \u001b[38;5;129;01mor\u001b[39;00m {}\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreq_kw \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRequestPayer\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrequester\u001b[39m\u001b[38;5;124m'\u001b[39m} \u001b[38;5;28;01mif\u001b[39;00m requester_pays \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43ms3\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mautocommit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39ms3 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfs \u001b[38;5;66;03m# compatibility\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwritable():\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1651\u001b[0m, in \u001b[0;36mAbstractBufferedFile.__init__\u001b[0;34m(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)\u001b[0m\n\u001b[1;32m 1649\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m=\u001b[39m size\n\u001b[1;32m 1650\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1651\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdetails\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msize\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1652\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache \u001b[38;5;241m=\u001b[39m caches[cache_type](\n\u001b[1;32m 1653\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocksize, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fetch_range, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcache_options\n\u001b[1;32m 1654\u001b[0m )\n\u001b[1;32m 1655\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1664\u001b[0m, in \u001b[0;36mAbstractBufferedFile.details\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1661\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 1662\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdetails\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 1663\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1664\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfo\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1665\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:548\u001b[0m, in \u001b[0;36mS3FileSystem.info\u001b[0;34m(self, path, version_id, refresh)\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m(S3FileSystem, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39minfo(path)\n\u001b[1;32m 547\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 548\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ee\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParamValidationError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFailed to head path \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m (path, e))\n",
"\u001b[0;31mPermissionError\u001b[0m: Forbidden"
]
}
],
"source": [
"display_databases('1', 'target_information')"
]
},
{
"cell_type": "markdown",
"id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901",
"metadata": {},
"source": [
"# Fin travail 25/02"
]
},
{
"cell_type": "markdown",
"id": "c437eaec",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Exemple sur Company 1"
]