traitement

This commit is contained in:
Fanta RODRIGUE 2024-02-11 13:17:32 +00:00
parent 9f433d2c12
commit 88b6d3fe1b

View File

@ -29,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"id": "b6035982-9ff4-4013-9792-2d50e10db3d1",
"metadata": {},
"outputs": [
@ -66,7 +66,7 @@
" 'bdc2324-data/1/1type_ofs.csv']"
]
},
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -84,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"id": "b86c935d-124f-453f-80dd-83ea6770d09c",
"metadata": {},
"outputs": [],
@ -94,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"id": "f6d0b27c-0ecd-406b-b042-6c3802dd68fd",
"metadata": {},
"outputs": [
@ -102,7 +102,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_447/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"/tmp/ipykernel_438/1008972637.py:5: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
]
}
@ -117,7 +117,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 9,
"id": "2a6b5e22-3370-457f-83b7-dd1e13663229",
"metadata": {},
"outputs": [
@ -127,7 +127,7 @@
"'bdc2324-data/1/1type_ofs.csv'"
]
},
"execution_count": 5,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -1689,10 +1689,123 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"id": "d22a0d75-53c5-4b54-9060-c9e7c307fb13",
"metadata": {},
"outputs": [],
"source": [
"BUCKET = \"bdc2324-data\"\n",
"directory_path = '2'"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7c229dad-6ebd-4f43-99f1-fb330dc29466",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/2/2campaign_stats.csv',\n",
" 'bdc2324-data/2/2campaigns.csv',\n",
" 'bdc2324-data/2/2categories.csv',\n",
" 'bdc2324-data/2/2contribution_sites.csv',\n",
" 'bdc2324-data/2/2contributions.csv',\n",
" 'bdc2324-data/2/2countries.csv',\n",
" 'bdc2324-data/2/2currencies.csv',\n",
" 'bdc2324-data/2/2customer_target_mappings.csv',\n",
" 'bdc2324-data/2/2customersplus.csv',\n",
" 'bdc2324-data/2/2event_types.csv',\n",
" 'bdc2324-data/2/2events.csv',\n",
" 'bdc2324-data/2/2facilities.csv',\n",
" 'bdc2324-data/2/2link_stats.csv',\n",
" 'bdc2324-data/2/2pricing_formulas.csv',\n",
" 'bdc2324-data/2/2product_packs.csv',\n",
" 'bdc2324-data/2/2products.csv',\n",
" 'bdc2324-data/2/2products_groups.csv',\n",
" 'bdc2324-data/2/2purchases.csv',\n",
" 'bdc2324-data/2/2representation_category_capacities.csv',\n",
" 'bdc2324-data/2/2representations.csv',\n",
" 'bdc2324-data/2/2seasons.csv',\n",
" 'bdc2324-data/2/2structure_tag_mappings.csv',\n",
" 'bdc2324-data/2/2suppliers.csv',\n",
" 'bdc2324-data/2/2tags.csv',\n",
" 'bdc2324-data/2/2target_types.csv',\n",
" 'bdc2324-data/2/2targets.csv',\n",
" 'bdc2324-data/2/2tickets.csv']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BUCKET = \"bdc2324-data/2\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "df3d3548-3d76-4f07-afa1-e240932bc1c7",
"metadata": {},
"outputs": [],
"source": [
"dic_base_ent2=['campaign_stats','campaigns','categories','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "90f8d5fc-43f3-4f36-b8cc-89a41785f032",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_438/673681459.py:5: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
"source": [
"dic_base_ent2=['campaign_stats','campaigns','categories','contribution_sites','contributions','countries','currencies','customer_target_mappings','customersplus','event_types','events','facilities','link_stats','pricing_formulas','product_packs','products','products_groups','purchases','representation_category_capacities','representations','seasons','structure_tag_mappings','suppliers','tags','target_types','targets','tickets']\n",
"for nom_base in dic_base_ent2:\n",
" FILE_PATH_S3_fanta = 'bdc2324-data/2/2' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "3e39a584-e02b-41b2-831c-33b920e298e9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"27"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dic_base_ent2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "06759646-9419-4841-b12f-bbfceb417f3a",
"metadata": {},
"outputs": [],
"source": []
}
],