From 88b6d3fe1b9b16c45bb096f4c3678c0b469b624b Mon Sep 17 00:00:00 2001 From: frodrigue-ensae Date: Sun, 11 Feb 2024 13:17:32 +0000 Subject: [PATCH] traitement --- Traitement_Fanta.ipynb | 129 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 8 deletions(-) diff --git a/Traitement_Fanta.ipynb b/Traitement_Fanta.ipynb index 5e3529d..651faaa 100644 --- a/Traitement_Fanta.ipynb +++ b/Traitement_Fanta.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "id": "b6035982-9ff4-4013-9792-2d50e10db3d1", "metadata": {}, "outputs": [ @@ -66,7 +66,7 @@ " 'bdc2324-data/1/1type_ofs.csv']" ] }, - "execution_count": 2, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "id": "b86c935d-124f-453f-80dd-83ea6770d09c", "metadata": {}, "outputs": [], @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "id": "f6d0b27c-0ecd-406b-b042-6c3802dd68fd", "metadata": {}, "outputs": [ @@ -102,7 +102,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_447/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_438/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n" ] } @@ -117,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "id": "2a6b5e22-3370-457f-83b7-dd1e13663229", "metadata": {}, "outputs": [ @@ -127,7 +127,7 @@ "'bdc2324-data/1/1type_ofs.csv'" ] }, - "execution_count": 5, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1689,10 +1689,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "d22a0d75-53c5-4b54-9060-c9e7c307fb13", "metadata": {}, "outputs": [], + "source": [ + "BUCKET = \"bdc2324-data\"\n", + "directory_path = '2'" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7c229dad-6ebd-4f43-99f1-fb330dc29466", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bdc2324-data/2/2campaign_stats.csv',\n", + " 'bdc2324-data/2/2campaigns.csv',\n", + " 'bdc2324-data/2/2categories.csv',\n", + " 'bdc2324-data/2/2contribution_sites.csv',\n", + " 'bdc2324-data/2/2contributions.csv',\n", + " 'bdc2324-data/2/2countries.csv',\n", + " 'bdc2324-data/2/2currencies.csv',\n", + " 'bdc2324-data/2/2customer_target_mappings.csv',\n", + " 'bdc2324-data/2/2customersplus.csv',\n", + " 'bdc2324-data/2/2event_types.csv',\n", + " 'bdc2324-data/2/2events.csv',\n", + " 'bdc2324-data/2/2facilities.csv',\n", + " 'bdc2324-data/2/2link_stats.csv',\n", + " 'bdc2324-data/2/2pricing_formulas.csv',\n", + " 'bdc2324-data/2/2product_packs.csv',\n", + " 'bdc2324-data/2/2products.csv',\n", + " 'bdc2324-data/2/2products_groups.csv',\n", + " 'bdc2324-data/2/2purchases.csv',\n", + " 'bdc2324-data/2/2representation_category_capacities.csv',\n", + " 'bdc2324-data/2/2representations.csv',\n", + " 'bdc2324-data/2/2seasons.csv',\n", + " 'bdc2324-data/2/2structure_tag_mappings.csv',\n", + " 'bdc2324-data/2/2suppliers.csv',\n", + " 'bdc2324-data/2/2tags.csv',\n", + " 'bdc2324-data/2/2target_types.csv',\n", + " 'bdc2324-data/2/2targets.csv',\n", + " 'bdc2324-data/2/2tickets.csv']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "BUCKET = \"bdc2324-data/2\"\n", + "fs.ls(BUCKET)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "df3d3548-3d76-4f07-afa1-e240932bc1c7", + "metadata": {}, + "outputs": [], + "source": [ + "dic_base_ent2=['campaign_stats','campaigns','categories','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "90f8d5fc-43f3-4f36-b8cc-89a41785f032", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_438/673681459.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n" + ] + } + ], + "source": [ + "dic_base_ent2=['campaign_stats','campaigns','categories','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']\n", + "for nom_base in dic_base_ent2:\n", + " FILE_PATH_S3_fanta = 'bdc2324-data/2/2' + nom_base + '.csv'\n", + " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", + " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "3e39a584-e02b-41b2-831c-33b920e298e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "27" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dic_base_ent2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06759646-9419-4841-b12f-bbfceb417f3a", + "metadata": {}, + "outputs": [], "source": [] } ],