From 716002bdcf967b588852320a3588c3dc14a077e6 Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Sun, 25 Feb 2024 17:33:24 +0000 Subject: [PATCH] Exploration tags et target --- Exploration_billet_AJ.ipynb | 962 +++++++++++++++++++++++++++++++++++- 1 file changed, 961 insertions(+), 1 deletion(-) diff --git a/Exploration_billet_AJ.ipynb b/Exploration_billet_AJ.ipynb index d697ff5..eb9f6c1 100644 --- a/Exploration_billet_AJ.ipynb +++ b/Exploration_billet_AJ.ipynb @@ -44,8 +44,968 @@ }, { "cell_type": "markdown", - "id": "c437eaec", + "id": "0294ce71-840e-458b-8ffa-cadabbc6da21", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "# Debut Travail 25/02" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "30d77451-2df6-4c07-8b15-66e0e990ff03", "metadata": {}, + "outputs": [], + "source": [ + "# Create filesystem object\n", + "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", + "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", + "\n", + "\n", + "# Import cleaning and merge functions\n", + "exec(open('0_KPI_functions.py').read())\n", + "\n", + "# Ignore warning\n", + "warnings.filterwarnings('ignore')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f1b44d3e-76bb-4860-b9db-a2840db7cf39", + "metadata": {}, + "outputs": [], + "source": [ + "def load_dataset_2(directory_path, file_name):\n", + " \"\"\"\n", + " This function loads csv file\n", + " \"\"\"\n", + " file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n", + " with fs.open(file_path, mode=\"rb\") as file_in:\n", + " df = pd.read_csv(file_in, sep=\",\")\n", + "\n", + " # drop na :\n", + " #df = df.dropna(axis=1, thresh=len(df))\n", + " # if identifier in table : delete it\n", + " if 'identifier' in df.columns:\n", + " df = df.drop(columns = 'identifier')\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "35da2e15-1e23-4653-a214-c6ff8f186e85", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idstreet_idstructure_idmcp_contact_idfidelitytenant_idis_partnerdeleted_atgenderis_email_true...max_priceticket_sumaverage_priceaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountry
060097451372685NaNNaN01771FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNaf
160112281372685NaNNaN01771FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNaf
260589501372685NaNNaN01771FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNaf
360624041372685NaNNaN01771FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNaf
425021778785NaN11035.001771FalseNaN0True...NaN00.0NaNNaNNaNNaN0NaNfr
..................................................................
47159349766213NaN4732462.001771FalseNaN0True...NaN0NaNNaNNaNNaN0.00NaNNaN
47159449766363NaN4731717.001771FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
47159549766373NaN4731674.001771FalseNaN0True...NaN0NaNNaNNaNNaN0.00NaNNaN
47159649766453NaN4731549.001771FalseNaN2True...NaN0NaNNaNNaNNaN0.00NaNNaN
47159749766663NaN4731118.001771FalseNaN0True...NaN0NaNNaNNaNNaN0.00NaNNaN
\n", + "

471598 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + " customer_id street_id structure_id mcp_contact_id fidelity \\\n", + "0 6009745 1372685 NaN NaN 0 \n", + "1 6011228 1372685 NaN NaN 0 \n", + "2 6058950 1372685 NaN NaN 0 \n", + "3 6062404 1372685 NaN NaN 0 \n", + "4 250217 78785 NaN 11035.0 0 \n", + "... ... ... ... ... ... \n", + "471593 4976621 3 NaN 4732462.0 0 \n", + "471594 4976636 3 NaN 4731717.0 0 \n", + "471595 4976637 3 NaN 4731674.0 0 \n", + "471596 4976645 3 NaN 4731549.0 0 \n", + "471597 4976666 3 NaN 4731118.0 0 \n", + "\n", + " tenant_id is_partner deleted_at gender is_email_true ... \\\n", + "0 1771 False NaN 2 True ... \n", + "1 1771 False NaN 2 True ... \n", + "2 1771 False NaN 2 True ... \n", + "3 1771 False NaN 2 True ... \n", + "4 1771 False NaN 0 True ... \n", + "... ... ... ... ... ... ... \n", + "471593 1771 False NaN 0 True ... \n", + "471594 1771 False NaN 2 True ... \n", + "471595 1771 False NaN 0 True ... \n", + "471596 1771 False NaN 2 True ... \n", + "471597 1771 False NaN 0 True ... \n", + "\n", + " max_price ticket_sum average_price average_purchase_delay \\\n", + "0 NaN 0 NaN NaN \n", + "1 NaN 0 NaN NaN \n", + "2 NaN 0 NaN NaN \n", + "3 NaN 0 NaN NaN \n", + "4 NaN 0 0.0 NaN \n", + "... ... ... ... ... \n", + "471593 NaN 0 NaN NaN \n", + "471594 NaN 0 NaN NaN \n", + "471595 NaN 0 NaN NaN \n", + "471596 NaN 0 NaN NaN \n", + "471597 NaN 0 NaN NaN \n", + "\n", + " average_price_basket average_ticket_basket total_price \\\n", + "0 NaN NaN 0.0 \n", + "1 NaN NaN 0.0 \n", + "2 NaN NaN 0.0 \n", + "3 NaN NaN 0.0 \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "471593 NaN NaN 0.0 \n", + "471594 NaN NaN 0.0 \n", + "471595 NaN NaN 0.0 \n", + "471596 NaN NaN 0.0 \n", + "471597 NaN NaN 0.0 \n", + "\n", + " purchase_count first_buying_date country \n", + "0 0 NaN af \n", + "1 0 NaN af \n", + "2 0 NaN af \n", + "3 0 NaN af \n", + "4 0 NaN fr \n", + "... ... ... ... \n", + "471593 0 NaN NaN \n", + "471594 0 NaN NaN \n", + "471595 0 NaN NaN \n", + "471596 0 NaN NaN \n", + "471597 0 NaN NaN \n", + "\n", + "[471598 rows x 22 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display_databases(\"5\", \"customerplus_cleaned\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "6c8ad8c3-25df-4fe4-9ad0-ee5f9498bc14", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamecodecreated_atupdated_at
0101hongriehu2023-06-13 11:17:40.600622+02:002023-06-13 11:17:40.600622+02:00
12albanieal2023-06-13 11:17:40.540652+02:002023-06-13 11:17:40.540652+02:00
23antarctiqueaq2023-06-13 11:17:40.541315+02:002023-06-13 11:17:40.541315+02:00
312autricheat2023-06-13 11:17:40.546711+02:002023-06-13 11:17:40.546711+02:00
45samoa américainesas2023-06-13 11:17:40.542569+02:002023-06-13 11:17:40.542569+02:00
..................
238228royaume-unigb2023-06-13 11:17:40.678023+02:002023-06-13 11:17:40.678023+02:00
23925brésilbr2023-06-13 11:17:40.554209+02:002023-06-13 11:17:40.554209+02:00
24010argentinear2023-06-13 11:17:40.545489+02:002023-06-13 11:17:40.545489+02:00
241203espagnees2023-06-13 11:17:40.662472+02:002023-06-13 11:17:40.662472+02:00
242192arabie saouditesa2023-06-13 11:17:40.656154+02:002023-06-13 11:17:40.656154+02:00
\n", + "

243 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " id name code created_at \\\n", + "0 101 hongrie hu 2023-06-13 11:17:40.600622+02:00 \n", + "1 2 albanie al 2023-06-13 11:17:40.540652+02:00 \n", + "2 3 antarctique aq 2023-06-13 11:17:40.541315+02:00 \n", + "3 12 autriche at 2023-06-13 11:17:40.546711+02:00 \n", + "4 5 samoa américaines as 2023-06-13 11:17:40.542569+02:00 \n", + ".. ... ... ... ... \n", + "238 228 royaume-uni gb 2023-06-13 11:17:40.678023+02:00 \n", + "239 25 brésil br 2023-06-13 11:17:40.554209+02:00 \n", + "240 10 argentine ar 2023-06-13 11:17:40.545489+02:00 \n", + "241 203 espagne es 2023-06-13 11:17:40.662472+02:00 \n", + "242 192 arabie saoudite sa 2023-06-13 11:17:40.656154+02:00 \n", + "\n", + " updated_at \n", + "0 2023-06-13 11:17:40.600622+02:00 \n", + "1 2023-06-13 11:17:40.540652+02:00 \n", + "2 2023-06-13 11:17:40.541315+02:00 \n", + "3 2023-06-13 11:17:40.546711+02:00 \n", + "4 2023-06-13 11:17:40.542569+02:00 \n", + ".. ... \n", + "238 2023-06-13 11:17:40.678023+02:00 \n", + "239 2023-06-13 11:17:40.554209+02:00 \n", + "240 2023-06-13 11:17:40.545489+02:00 \n", + "241 2023-06-13 11:17:40.662472+02:00 \n", + "242 2023-06-13 11:17:40.656154+02:00 \n", + "\n", + "[243 rows x 5 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "load_dataset_2(\"7\", \"countries\")" + ] + }, + { + "cell_type": "markdown", + "id": "ca2c8b6a-4965-422e-ba7c-66423a464fc1", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Base communes au types Musée" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8f988fb-5aab-4b57-80d1-e242f7e5b384", + "metadata": {}, + "outputs": [], + "source": [ + "companies = {'musee' : ['1', '2', '3', '4', '101'],\n", + " 'sport': ['5', '6', '7', '8', '9'],\n", + " 'musique' : ['10', '11', '12', '13', '14']}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbce1124-9a22-4502-a47a-fc3d0e2db70b", + "metadata": {}, + "outputs": [], + "source": [ + "companies['musee']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5080f66e-f779-410a-876d-b4fe2795e17e", + "metadata": {}, + "outputs": [], + "source": [ + "for i in companies['musique']:\n", + " BUCKET = \"bdc2324-data/\"+i\n", + " liste_base = []\n", + " for base in fs.ls(BUCKET):\n", + " match = re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', base)\n", + " if match:\n", + " nom_base = match.group(3)\n", + " liste_base.append(nom_base)\n", + " globals()['base_'+i] = liste_base\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abd477e1-7479-4c88-a5aa-f987af3f5b79", + "metadata": {}, + "outputs": [], + "source": [ + "# Trouver l'intersection entre les cinq listes\n", + "intersection = set(base_1).intersection(base_2, base_3, base_4, base_101)\n", + "\n", + "# Convertir le résultat en liste si nécessaire\n", + "intersection_liste = list(intersection)\n", + "\n", + "print(intersection_liste)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d93888f-a511-4ee5-8bc3-d5173a7f119e", + "metadata": {}, + "outputs": [], + "source": [ + "# Trouver l'intersection entre les cinq listes\n", + "intersection = set(base_10).intersection(base_12, base_13, base_14, base_11)\n", + "\n", + "# Convertir le résultat en liste si nécessaire\n", + "intersection_liste = list(intersection)\n", + "\n", + "print(intersection_liste)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10e89669-42bb-4652-a4bc-1a3d1caf4d1a", + "metadata": {}, + "outputs": [], + "source": [ + "len(intersection_liste)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0aa8976-1487-4ef5-898e-0d6a88183e67", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67f02868-b16a-41d5-a0f9-b31ce09278db", + "metadata": {}, + "outputs": [], + "source": [ + "base_101" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d058b21-a538-4f59-aefb-ef7966f73fdc", + "metadata": {}, + "outputs": [], + "source": [ + "df1_tags = load_dataset_2(\"1\", \"tags\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa441f99-733c-4675-8676-bed4682d3324", + "metadata": {}, + "outputs": [], + "source": [ + "df1_structure_tag_mappings = load_dataset_2(\"1\", 'structure_tag_mappings')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6767a750-14a4-4c05-903e-d2f07170825b", + "metadata": {}, + "outputs": [], + "source": [ + "df1_customersplus = load_dataset_2(\"1\", \"customersplus\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "125e9145-a815-46fd-bdf4-07589508b259", + "metadata": {}, + "outputs": [], + "source": [ + "df1_customersplus.groupby('structure_id')['id'].count().reset_index().sort_values('id', ascending=False).head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c17a6976-792f-474d-bcff-c89396eddb3f", + "metadata": {}, + "outputs": [], + "source": [ + "df1_customersplus['structure_id'].isna().sum() / len(df1_customersplus['structure_id'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ecfc155a-cb42-46ec-8da5-33fdcd087355", + "metadata": {}, + "outputs": [], + "source": [ + "len(df1_structure_tag_mappings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "071410b8-950d-4fcc-b2b9-57415253c286", + "metadata": {}, + "outputs": [], + "source": [ + "df1_structure_tag_mappings.groupby('tag_id')['structure_id'].count().reset_index().sort_values('structure_id', ascending=False).head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f48d27a9-14e4-4bb9-a60a-73e9438b58fc", + "metadata": {}, + "outputs": [], + "source": [ + "?np.sort_values()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14eaa0ea-02cc-430b-ab9b-38e6637810c3", + "metadata": {}, + "outputs": [], + "source": [ + "def info_colonnes_dataframe(df):\n", + " # Créer une liste pour stocker les informations sur chaque colonne\n", + " infos_colonnes = []\n", + "\n", + " # Parcourir les colonnes du DataFrame\n", + " for nom_colonne, serie in df.items(): # Utiliser items() au lieu de iteritems()\n", + " # Calculer le taux de valeurs manquantes\n", + " taux_na = serie.isna().mean() * 100\n", + "\n", + " # Ajouter les informations à la liste\n", + " infos_colonnes.append({\n", + " 'Nom_colonne': nom_colonne,\n", + " 'Type_colonne': str(serie.dtype),\n", + " 'Taux_NA': taux_na\n", + " })\n", + "\n", + " # Créer une nouvelle DataFrame à partir de la liste d'informations\n", + " df_infos_colonnes = pd.DataFrame(infos_colonnes)\n", + "\n", + " return df_infos_colonnes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b031c32-d4c8-42a5-9a71-a7810f9bf8d8", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "info_colonnes_dataframe(df1_tags)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1a87f27-c4d4-4832-ac20-0c3c54aa4980", + "metadata": {}, + "outputs": [], + "source": [ + "info_colonnes_dataframe(df1_structure_tag_mappings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa5c65a8-2f74-4f3f-85fc-9ac91e0bb361", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_colwidth', None)\n", + "\n", + "print(df1_tags['name'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a59bf932-5b54-4600-81f5-c55ac93ae510", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_rows', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4ab298e-2cae-4865-9f00-4caff5f75ea1", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "print(df1_tags['name'])" + ] + }, + { + "cell_type": "markdown", + "id": "76bffba1-5f7e-4308-9224-437ca66148f8", + "metadata": {}, + "source": [ + "## KPI sur target_type" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "622752ed-b565-4188-86d6-38f1f333fcbe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n" + ] + }, + { + "ename": "PermissionError", + "evalue": "Forbidden", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:529\u001b[0m, in \u001b[0;36mS3FileSystem.info\u001b[0;34m(self, path, version_id, refresh)\u001b[0m\n\u001b[1;32m 528\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 529\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_s3\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ms3\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhead_object\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 530\u001b[0m \u001b[43m \u001b[49m\u001b[43mKey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mversion_id_kw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion_id\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreq_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 531\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[1;32m 532\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m: out[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 533\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mKey\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([bucket, key]),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVersionId\u001b[39m\u001b[38;5;124m'\u001b[39m: out\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVersionId\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 541\u001b[0m }\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:200\u001b[0m, in \u001b[0;36mS3FileSystem._call_s3\u001b[0;34m(self, method, *akwarglist, **kwargs)\u001b[0m\n\u001b[1;32m 198\u001b[0m additional_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_s3_method_kwargs(method, \u001b[38;5;241m*\u001b[39makwarglist,\n\u001b[1;32m 199\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mClientError\u001b[0m: An error occurred (403) when calling the HeadObject operation: Forbidden", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mPermissionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[74], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdisplay_databases\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m1\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtarget_information\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m:12\u001b[0m, in \u001b[0;36mdisplay_databases\u001b[0;34m(directory_path, file_name, datetime_col)\u001b[0m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1295\u001b[0m, in \u001b[0;36mAbstractFileSystem.open\u001b[0;34m(self, path, mode, block_size, cache_options, compression, **kwargs)\u001b[0m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1294\u001b[0m ac \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mautocommit\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_intrans)\n\u001b[0;32m-> 1295\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_open\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1296\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1297\u001b[0m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1298\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1299\u001b[0m \u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mac\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1300\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1301\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1302\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m compression \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1304\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompression\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m compr\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:375\u001b[0m, in \u001b[0;36mS3FileSystem._open\u001b[0;34m(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, **kwargs)\u001b[0m\n\u001b[1;32m 372\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_type \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 373\u001b[0m cache_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_cache_type\n\u001b[0;32m--> 375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mS3File\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43macl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43macl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 376\u001b[0m \u001b[43m \u001b[49m\u001b[43mversion_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[43m \u001b[49m\u001b[43ms3_additional_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcache_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 378\u001b[0m \u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mautocommit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequester_pays\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequester_pays\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:1096\u001b[0m, in \u001b[0;36mS3File.__init__\u001b[0;34m(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays)\u001b[0m\n\u001b[1;32m 1094\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39ms3_additional_kwargs \u001b[38;5;241m=\u001b[39m s3_additional_kwargs \u001b[38;5;129;01mor\u001b[39;00m {}\n\u001b[1;32m 1095\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreq_kw \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRequestPayer\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrequester\u001b[39m\u001b[38;5;124m'\u001b[39m} \u001b[38;5;28;01mif\u001b[39;00m requester_pays \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m-> 1096\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43ms3\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mautocommit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mautocommit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39ms3 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfs \u001b[38;5;66;03m# compatibility\u001b[39;00m\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwritable():\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1651\u001b[0m, in \u001b[0;36mAbstractBufferedFile.__init__\u001b[0;34m(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)\u001b[0m\n\u001b[1;32m 1649\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m=\u001b[39m size\n\u001b[1;32m 1650\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1651\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdetails\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msize\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 1652\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache \u001b[38;5;241m=\u001b[39m caches[cache_type](\n\u001b[1;32m 1653\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocksize, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fetch_range, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msize, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcache_options\n\u001b[1;32m 1654\u001b[0m )\n\u001b[1;32m 1655\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1664\u001b[0m, in \u001b[0;36mAbstractBufferedFile.details\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1661\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 1662\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdetails\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 1663\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1664\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfo\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1665\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_details\n", + "File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:548\u001b[0m, in \u001b[0;36mS3FileSystem.info\u001b[0;34m(self, path, version_id, refresh)\u001b[0m\n\u001b[1;32m 546\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m(S3FileSystem, \u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39minfo(path)\n\u001b[1;32m 547\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 548\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ee\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ParamValidationError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFailed to head path \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m (path, e))\n", + "\u001b[0;31mPermissionError\u001b[0m: Forbidden" + ] + } + ], + "source": [ + "display_databases('1', 'target_information')" + ] + }, + { + "cell_type": "markdown", + "id": "1ede9eaa-7f0a-4856-9349-b2747d6a4901", + "metadata": {}, + "source": [ + "# Fin travail 25/02" + ] + }, + { + "cell_type": "markdown", + "id": "c437eaec", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "# Exemple sur Company 1" ]