diff --git a/Spectacle/Stat_desc.ipynb b/Spectacle/Stat_desc.ipynb new file mode 100644 index 0000000..42b5ca5 --- /dev/null +++ b/Spectacle/Stat_desc.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "aa915888-cede-4eb0-8a26-7df573d29a3e", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "import s3fs\n", + "import warnings\n", + "from datetime import date, timedelta, datetime\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e", + "metadata": {}, + "outputs": [], + "source": [ + "# Import KPI construction functions\n", + "#exec(open('0_KPI_functions.py').read())\n", + "exec(open('../0_KPI_functions.py').read())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9c1737a2-bad8-4266-8dec-452085d8cfe7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n", + " 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create filesystem object\n", + "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", + "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n", + "\n", + "BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n", + "fs.ls(BUCKET)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2", + "metadata": {}, + "outputs": [], + "source": [ + "dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n", + "for nom_base in dic_base:\n", + " FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n", + " with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n", + " globals()[nom_base] = pd.read_csv(file_in, sep=\",\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "40b705eb-fd18-436b-b150-61611a3c6a84", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def display_databases(directory_path, file_name, datetime_col = None):\n", + " \"\"\"\n", + " This function returns the file from s3 storage \n", + " \"\"\"\n", + " file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n", + " print(\"File path : \", file_path)\n", + " with fs.open(file_path, mode=\"rb\") as file_in:\n", + " df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n", + " return df \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e56aa16f-a167-4bff-9f8b-f764d1f28ebd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}