diff --git a/data_exploration/aum_analysis.ipynb b/data_exploration/aum_analysis.ipynb new file mode 100644 index 0000000..d7969a9 --- /dev/null +++ b/data_exploration/aum_analysis.ipynb @@ -0,0 +1,268 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AUM Analysis\n", + "\n", + "This notebook sums the **Value - AUM €** by **Product - Asset Type** and by **Product - Fund** from the AUM sample data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting openpyxl\n", + " Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)\n", + "Collecting et-xmlfile (from openpyxl)\n", + " Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)\n", + "Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)\n", + "Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)\n", + "Installing collected packages: et-xmlfile, openpyxl\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [openpyxl]1/2\u001b[0m [openpyxl]\n", + "\u001b[1A\u001b[2KSuccessfully installed et-xmlfile-2.0.0 openpyxl-3.1.5\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "!pip install openpyxl\n", + "import os\n", + "import s3fs\n", + "import seaborn as sns\n", + "import plotly.express as px" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "fs = s3fs.S3FileSystem(\n", + " client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n", + " key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n", + " secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n", + " token = os.environ[\"AWS_SESSION_TOKEN\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_8794/3768862044.py:5: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " stocks = pd.read_csv(f, sep=\";\")\n" + ] + } + ], + "source": [ + "#with fs.open('projet-bdc-data//carmignac/Flows ENSAE V2 -20251105.csv', 'rb') as f:\n", + " #flows = pd.read_csv(f, sep=\";\")\n", + "\n", + "with fs.open('projet-bdc-data//carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n", + " stocks = pd.read_csv(f, sep=\";\")\n", + "\n", + "#with fs.open('projet-bdc-data/carmignac/Monthly AUM and NAV since 2010.xlsx', 'rb') as f:#\n", + " #nav_raw = pd.read_excel(f, header=None, engine=\"openpyxl\")\n", + "\n", + "#nav = nav_raw[0].str.split(\",\", expand=True)\n", + "#nav.columns = nav.iloc[0]\n", + "#nav = nav[1:].reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Date conversion done.\n" + ] + } + ], + "source": [ + "stocks[\"Centralisation Date\"] = pd.to_datetime(stocks[\"Centralisation Date\"], errors=\"coerce\")\n", + "#flows[\"Centralisation Date\"] = pd.to_datetime(flows[\"Centralisation Date\"], errors=\"coerce\")\n", + "#nav[\"NavDate\"] = pd.to_datetime(nav[\"NavDate\"], format=\"%d/%m/%Y\", errors=\"coerce\")\n", + "\n", + "print(\"Date conversion done.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sum of AUM by Product - Asset Type" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AUM (€) by Product - Asset Type:\n", + "Product - Asset Type\n", + "Diversified 2.249487e+12\n", + "Fixed Income 1.901982e+12\n", + "Equity 9.811712e+11\n", + "Alternative 1.208047e+11\n", + "NaN 1.786480e+10\n", + "Private Assets 2.205183e+09\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Product - Asset TypeTotal AUM (€)
0Diversified2.249487e+12
1Fixed Income1.901982e+12
2Equity9.811712e+11
3Alternative1.208047e+11
4NaN1.786480e+10
5Private Assets2.205183e+09
\n", + "
" + ], + "text/plain": [ + " Product - Asset Type Total AUM (€)\n", + "0 Diversified 2.249487e+12\n", + "1 Fixed Income 1.901982e+12\n", + "2 Equity 9.811712e+11\n", + "3 Alternative 1.208047e+11\n", + "4 NaN 1.786480e+10\n", + "5 Private Assets 2.205183e+09" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Sum Value - AUM € per Product - Asset Type\n", + "aum_by_asset_type = stocks.groupby('Product - Asset Type', dropna=False)['Value - AUM €'].sum().sort_values(ascending=False)\n", + "\n", + "print(\"AUM (€) by Product - Asset Type:\")\n", + "print(aum_by_asset_type.to_string())\n", + "\n", + "# Display as DataFrame for nicer formatting\n", + "aum_by_asset_type_df = aum_by_asset_type.reset_index()\n", + "aum_by_asset_type_df.columns = ['Product - Asset Type', 'Total AUM (€)']\n", + "aum_by_asset_type_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sum of AUM by Product - Fund" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Sum Value - AUM € per Product - Fund\n", + "aum_by_fund = stocks.groupby('Product - Fund', dropna=False)['Value - AUM €'].sum().sort_values(ascending=False)\n", + "\n", + "print(\"AUM (€) by Product - Fund:\")\n", + "print(aum_by_fund.to_string())\n", + "\n", + "# Display as DataFrame for nicer formatting\n", + "aum_by_fund_df = aum_by_fund.reset_index()\n", + "aum_by_fund_df.columns = ['Product - Fund', 'Total AUM (€)']\n", + "aum_by_fund_df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}