{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "aa915888-cede-4eb0-8a26-7df573d29a3e", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import s3fs\n", "import warnings\n", "from datetime import date, timedelta, datetime\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e", "metadata": {}, "outputs": [], "source": [ "# Import KPI construction functions\n", "#exec(open('0_KPI_functions.py').read())\n", "exec(open('../0_KPI_functions.py').read())\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "9c1737a2-bad8-4266-8dec-452085d8cfe7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", "\n", "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 5, "id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2", "metadata": {}, "outputs": [], "source": [ "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", "for nom_base in dic_base:\n", " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "40b705eb-fd18-436b-b150-61611a3c6a84", "metadata": {}, "outputs": [], "source": [ "\n", "def display_databases(directory_path, file_name, datetime_col = None):\n", " \"\"\"\n", " This function returns the file from s3 storage \n", " \"\"\"\n", " file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n", " print(\"File path : \", file_path)\n", " with fs.open(file_path, mode=\"rb\") as file_in:\n", " df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n", " return df \n" ] }, { "cell_type": "code", "execution_count": null, "id": "e56aa16f-a167-4bff-9f8b-f764d1f28ebd", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }