123 lines
3.4 KiB
Plaintext
123 lines
3.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import os\n",
|
|
"import s3fs\n",
|
|
"import warnings\n",
|
|
"from datetime import date, timedelta, datetime\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Import KPI construction functions\n",
|
|
"#exec(open('0_KPI_functions.py').read())\n",
|
|
"exec(open('../0_KPI_functions.py').read())\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
|
|
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
|
|
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
|
|
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Create filesystem object\n",
|
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
|
"\n",
|
|
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
|
|
"fs.ls(BUCKET)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
|
|
"for nom_base in dic_base:\n",
|
|
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
|
|
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
|
|
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"def display_databases(directory_path, file_name, datetime_col = None):\n",
|
|
" \"\"\"\n",
|
|
" This function returns the file from s3 storage \n",
|
|
" \"\"\"\n",
|
|
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
|
|
" print(\"File path : \", file_path)\n",
|
|
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
|
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
|
|
" return df \n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e56aa16f-a167-4bff-9f8b-f764d1f28ebd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|