Project_Carmignac/data/explore.ipynb

1347 lines
51 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "bd938e6e",
"metadata": {},
"source": [
"**Short notebook to test connectivity with S3 services and explore the data**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "127753ac",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ae3c64fe",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import s3fs\n",
"fs = s3fs.S3FileSystem(\n",
" client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
" key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
" secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
" token = os.environ[\"AWS_SESSION_TOKEN\"])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "84b9ac42",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"def sample_by_blocks(df, block_size=10, num_blocks=10, random_state=None):\n",
" \"\"\"Sample num_blocks blocks of block_size consecutive rows (no overlapping blocks).\"\"\"\n",
" n = len(df)\n",
" max_start = n - block_size\n",
" if max_start < 0:\n",
" raise ValueError(f\"DataFrame has {n} rows, need at least {block_size}\")\n",
" if max_start + 1 < num_blocks:\n",
" raise ValueError(f\"Not enough room for {num_blocks} non-overlapping blocks (need at least {num_blocks * block_size} rows)\")\n",
" rng = np.random.default_rng(random_state)\n",
" chosen_starts = rng.choice(max_start + 1, size=num_blocks, replace=False)\n",
" chosen_starts.sort() # blocks in order of position in original df\n",
" indices = np.concatenate([np.arange(s, s + block_size) for s in chosen_starts])\n",
" return df.iloc[indices].reset_index(drop=True)\n",
"\n",
"# sample_df = sample_by_blocks(df, block_size=10, num_blocks=10, random_state=42)"
]
},
{
"cell_type": "markdown",
"id": "7f7d45bb",
"metadata": {},
"source": [
"### OG AUM"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "83472648",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_175445/2279824029.py:2: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" aum = pd.read_csv(f, sep =\";\")\n"
]
}
],
"source": [
"with fs.open('s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n",
" aum = pd.read_csv(f, sep =\";\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0b84ede5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2716081</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>NaN</td>\n",
" <td>Infotech</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Infotech</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2015-06-30</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2716082</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>NaN</td>\n",
" <td>Infotech</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Infotech</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2016-03-31</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2716092</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>NaN</td>\n",
" <td>Infotech</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Infotech</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2017-05-31</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2716093</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>NaN</td>\n",
" <td>Infotech</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Infotech</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2019-02-28</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2716094</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>NaN</td>\n",
" <td>Infotech</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Infotech</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2019-03-31</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3652177</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Equity</td>\n",
" <td>Investissement Latitude</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement Latitude</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2018-05-31</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3652178</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Equity</td>\n",
" <td>Investissement Latitude</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement Latitude</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2018-06-30</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3652179</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Equity</td>\n",
" <td>Investissement Latitude</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement Latitude</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2018-12-31</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3652180</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Equity</td>\n",
" <td>Investissement Latitude</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement Latitude</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2019-08-31</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3652181</th>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Equity</td>\n",
" <td>Investissement Latitude</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement Latitude</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2020-01-31</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>640 rows × 18 columns</p>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"2716081 L134 15292.0 15292.0 \n",
"2716082 L134 15292.0 15292.0 \n",
"2716092 L134 15292.0 15292.0 \n",
"2716093 L134 15292.0 15292.0 \n",
"2716094 L134 15292.0 15292.0 \n",
"... ... ... ... \n",
"3652177 L134 15292.0 15292.0 \n",
"3652178 L134 15292.0 15292.0 \n",
"3652179 L134 15292.0 15292.0 \n",
"3652180 L134 15292.0 15292.0 \n",
"3652181 L134 15292.0 15292.0 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"2716081 11215 Luxembourg \n",
"2716082 11215 Luxembourg \n",
"2716092 11215 Luxembourg \n",
"2716093 11215 Luxembourg \n",
"2716094 11215 Luxembourg \n",
"... ... ... \n",
"3652177 11215 Luxembourg \n",
"3652178 11215 Luxembourg \n",
"3652179 11215 Luxembourg \n",
"3652180 11215 Luxembourg \n",
"3652181 11215 Luxembourg \n",
"\n",
" RegistrarAccount - Country Product - Asset Type \\\n",
"2716081 Luxembourg NaN \n",
"2716082 Luxembourg NaN \n",
"2716092 Luxembourg NaN \n",
"2716093 Luxembourg NaN \n",
"2716094 Luxembourg NaN \n",
"... ... ... \n",
"3652177 Luxembourg Equity \n",
"3652178 Luxembourg Equity \n",
"3652179 Luxembourg Equity \n",
"3652180 Luxembourg Equity \n",
"3652181 Luxembourg Equity \n",
"\n",
" Product - Strategy Product - Legal Status Product - Is Dedie ? \\\n",
"2716081 Infotech SICAV NO \n",
"2716082 Infotech SICAV NO \n",
"2716092 Infotech SICAV NO \n",
"2716093 Infotech SICAV NO \n",
"2716094 Infotech SICAV NO \n",
"... ... ... ... \n",
"3652177 Investissement Latitude FCP NO \n",
"3652178 Investissement Latitude FCP NO \n",
"3652179 Investissement Latitude FCP NO \n",
"3652180 Investissement Latitude FCP NO \n",
"3652181 Investissement Latitude FCP NO \n",
"\n",
" Product - Fund Product - Shareclass Type \\\n",
"2716081 Carmignac Portfolio Infotech A \n",
"2716082 Carmignac Portfolio Infotech A \n",
"2716092 Carmignac Portfolio Infotech A \n",
"2716093 Carmignac Portfolio Infotech A \n",
"2716094 Carmignac Portfolio Infotech A \n",
"... ... ... \n",
"3652177 Carmignac Investissement Latitude A \n",
"3652178 Carmignac Investissement Latitude A \n",
"3652179 Carmignac Investissement Latitude A \n",
"3652180 Carmignac Investissement Latitude A \n",
"3652181 Carmignac Investissement Latitude A \n",
"\n",
" Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
"2716081 EUR LU0109929157 2015-06-30 \n",
"2716082 EUR LU0109929157 2016-03-31 \n",
"2716092 EUR LU0109929157 2017-05-31 \n",
"2716093 EUR LU0109929157 2019-02-28 \n",
"2716094 EUR LU0109929157 2019-03-31 \n",
"... ... ... ... \n",
"3652177 EUR FR0010147603 2018-05-31 \n",
"3652178 EUR FR0010147603 2018-06-30 \n",
"3652179 EUR FR0010147603 2018-12-31 \n",
"3652180 EUR FR0010147603 2019-08-31 \n",
"3652181 EUR FR0010147603 2020-01-31 \n",
"\n",
" Quantity - AUM Value - AUM CCY Value - AUM € \n",
"2716081 0.0 0.0 0.0 \n",
"2716082 0.0 0.0 0.0 \n",
"2716092 0.0 0.0 0.0 \n",
"2716093 0.0 0.0 0.0 \n",
"2716094 0.0 0.0 0.0 \n",
"... ... ... ... \n",
"3652177 0.0 0.0 0.0 \n",
"3652178 0.0 0.0 0.0 \n",
"3652179 0.0 0.0 0.0 \n",
"3652180 0.0 0.0 0.0 \n",
"3652181 0.0 0.0 0.0 \n",
"\n",
"[640 rows x 18 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aum[aum[\"Registrar Account - ID\"] == 11215]"
]
},
{
"cell_type": "markdown",
"id": "6f40c922",
"metadata": {},
"source": [
"### Repaired AUM"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b6edd4fd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_175445/204804706.py:2: DtypeWarning: Columns (2,3,4,5) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df_repaired = pd.read_csv(f, sep =\",\")\n"
]
}
],
"source": [
"with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f:\n",
" df_repaired = pd.read_csv(f, sep =\",\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2521a2a6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0.1</th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>...</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" <th>repair_flag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2995412</th>\n",
" <td>2995412</td>\n",
" <td>2716081</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>NAN</td>\n",
" <td>INFOTECH</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO INFOTECH</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2015-06-30</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2995413</th>\n",
" <td>2995413</td>\n",
" <td>2716082</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>NAN</td>\n",
" <td>INFOTECH</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO INFOTECH</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2016-03-31</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2995423</th>\n",
" <td>2995423</td>\n",
" <td>2716092</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>NAN</td>\n",
" <td>INFOTECH</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO INFOTECH</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2017-05-31</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2995424</th>\n",
" <td>2995424</td>\n",
" <td>2716093</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>NAN</td>\n",
" <td>INFOTECH</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO INFOTECH</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2019-02-28</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2995425</th>\n",
" <td>2995425</td>\n",
" <td>2716094</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>NAN</td>\n",
" <td>INFOTECH</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO INFOTECH</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>LU0109929157</td>\n",
" <td>2019-03-31</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4073314</th>\n",
" <td>4073314</td>\n",
" <td>3652177</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT LATITUDE</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT LATITUDE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2018-05-31</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4073315</th>\n",
" <td>4073315</td>\n",
" <td>3652178</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT LATITUDE</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT LATITUDE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2018-06-30</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4073316</th>\n",
" <td>4073316</td>\n",
" <td>3652179</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT LATITUDE</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT LATITUDE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2018-12-31</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4073317</th>\n",
" <td>4073317</td>\n",
" <td>3652180</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT LATITUDE</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT LATITUDE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2019-08-31</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4073318</th>\n",
" <td>4073318</td>\n",
" <td>3652181</td>\n",
" <td>L134</td>\n",
" <td>15292.0</td>\n",
" <td>15292.0</td>\n",
" <td>11215</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT LATITUDE</td>\n",
" <td>...</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT LATITUDE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010147603</td>\n",
" <td>2020-01-31</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>640 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0.1 Unnamed: 0 Agreement - Code Company - Id \\\n",
"2995412 2995412 2716081 L134 15292.0 \n",
"2995413 2995413 2716082 L134 15292.0 \n",
"2995423 2995423 2716092 L134 15292.0 \n",
"2995424 2995424 2716093 L134 15292.0 \n",
"2995425 2995425 2716094 L134 15292.0 \n",
"... ... ... ... ... \n",
"4073314 4073314 3652177 L134 15292.0 \n",
"4073315 4073315 3652178 L134 15292.0 \n",
"4073316 4073316 3652179 L134 15292.0 \n",
"4073317 4073317 3652180 L134 15292.0 \n",
"4073318 4073318 3652181 L134 15292.0 \n",
"\n",
" Company - Ultimate Parent Id Registrar Account - ID \\\n",
"2995412 15292.0 11215 \n",
"2995413 15292.0 11215 \n",
"2995423 15292.0 11215 \n",
"2995424 15292.0 11215 \n",
"2995425 15292.0 11215 \n",
"... ... ... \n",
"4073314 15292.0 11215 \n",
"4073315 15292.0 11215 \n",
"4073316 15292.0 11215 \n",
"4073317 15292.0 11215 \n",
"4073318 15292.0 11215 \n",
"\n",
" Registrar Account - Region RegistrarAccount - Country \\\n",
"2995412 LUXEMBOURG LUXEMBOURG \n",
"2995413 LUXEMBOURG LUXEMBOURG \n",
"2995423 LUXEMBOURG LUXEMBOURG \n",
"2995424 LUXEMBOURG LUXEMBOURG \n",
"2995425 LUXEMBOURG LUXEMBOURG \n",
"... ... ... \n",
"4073314 LUXEMBOURG LUXEMBOURG \n",
"4073315 LUXEMBOURG LUXEMBOURG \n",
"4073316 LUXEMBOURG LUXEMBOURG \n",
"4073317 LUXEMBOURG LUXEMBOURG \n",
"4073318 LUXEMBOURG LUXEMBOURG \n",
"\n",
" Product - Asset Type Product - Strategy ... \\\n",
"2995412 NAN INFOTECH ... \n",
"2995413 NAN INFOTECH ... \n",
"2995423 NAN INFOTECH ... \n",
"2995424 NAN INFOTECH ... \n",
"2995425 NAN INFOTECH ... \n",
"... ... ... ... \n",
"4073314 EQUITY INVESTISSEMENT LATITUDE ... \n",
"4073315 EQUITY INVESTISSEMENT LATITUDE ... \n",
"4073316 EQUITY INVESTISSEMENT LATITUDE ... \n",
"4073317 EQUITY INVESTISSEMENT LATITUDE ... \n",
"4073318 EQUITY INVESTISSEMENT LATITUDE ... \n",
"\n",
" Product - Is Dedie ? Product - Fund \\\n",
"2995412 NO CARMIGNAC PORTFOLIO INFOTECH \n",
"2995413 NO CARMIGNAC PORTFOLIO INFOTECH \n",
"2995423 NO CARMIGNAC PORTFOLIO INFOTECH \n",
"2995424 NO CARMIGNAC PORTFOLIO INFOTECH \n",
"2995425 NO CARMIGNAC PORTFOLIO INFOTECH \n",
"... ... ... \n",
"4073314 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
"4073315 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
"4073316 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
"4073317 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
"4073318 NO CARMIGNAC INVESTISSEMENT LATITUDE \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency \\\n",
"2995412 A EUR \n",
"2995413 A EUR \n",
"2995423 A EUR \n",
"2995424 A EUR \n",
"2995425 A EUR \n",
"... ... ... \n",
"4073314 A EUR \n",
"4073315 A EUR \n",
"4073316 A EUR \n",
"4073317 A EUR \n",
"4073318 A EUR \n",
"\n",
" Product - Isin Centralisation Date Quantity - AUM Value - AUM CCY \\\n",
"2995412 LU0109929157 2015-06-30 0.0 NaN \n",
"2995413 LU0109929157 2016-03-31 0.0 NaN \n",
"2995423 LU0109929157 2017-05-31 0.0 NaN \n",
"2995424 LU0109929157 2019-02-28 0.0 NaN \n",
"2995425 LU0109929157 2019-03-31 0.0 NaN \n",
"... ... ... ... ... \n",
"4073314 FR0010147603 2018-05-31 0.0 NaN \n",
"4073315 FR0010147603 2018-06-30 0.0 NaN \n",
"4073316 FR0010147603 2018-12-31 0.0 NaN \n",
"4073317 FR0010147603 2019-08-31 0.0 NaN \n",
"4073318 FR0010147603 2020-01-31 0.0 NaN \n",
"\n",
" Value - AUM € repair_flag \n",
"2995412 NaN False \n",
"2995413 NaN False \n",
"2995423 NaN False \n",
"2995424 NaN False \n",
"2995425 NaN False \n",
"... ... ... \n",
"4073314 NaN False \n",
"4073315 NaN False \n",
"4073316 NaN False \n",
"4073317 NaN False \n",
"4073318 NaN False \n",
"\n",
"[640 rows x 21 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_repaired[df_repaired[\"Registrar Account - ID\"] == 11215]"
]
},
{
"cell_type": "markdown",
"id": "74ab7fb4",
"metadata": {},
"source": [
"### Flows"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3347dc39",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_159596/3878087020.py:2: DtypeWarning: Columns (1,2,3,4) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" flows = pd.read_csv(f, sep =\",\")\n"
]
}
],
"source": [
"with fs.open('s3://projet-bdc-carmignac-g3/flows.csv', 'rb') as f:\n",
" flows = pd.read_csv(f, sep =\",\")\n",
"\n",
"sample_flows = sample_by_blocks(flows, block_size=10, num_blocks=10, random_state=42)"
]
},
{
"cell_type": "markdown",
"id": "4bb4f9c7",
"metadata": {},
"source": [
"## Clustering"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5957c389",
"metadata": {},
"outputs": [],
"source": [
"def load_and_clean_data(flows_path, aum_path, rates_path, gov_path):\n",
" \"\"\"\n",
" Loads raw CSVs and parses dates for consistent time-series analysis.\n",
" \"\"\"\n",
"\n",
" flows = pd.read_csv(flows_path)\n",
" flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date'])\n",
" \n",
" aum = pd.read_csv(aum_path)\n",
" aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date'])\n",
" \n",
" rates = pd.read_csv(rates_path)\n",
" try:\n",
" rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True)\n",
" except:\n",
" rates['Date'] = pd.to_datetime(rates['Date'])\n",
" \n",
" gov = pd.read_csv(gov_path)\n",
" gov['Date'] = pd.to_datetime(gov['Date'])\n",
" \n",
" return flows, aum, rates, gov"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "479a50b8",
"metadata": {},
"outputs": [],
"source": [
"flows_path = \"flows_sample.csv\"\n",
"aum_path = \"aum_sample.csv\"\n",
"rates_path = \"str_rates.csv\"\n",
"gov_path = \"eur_gov_indices.csv\"\n",
"\n",
"flows, aum, rates, gov = load_and_clean_data(flows_path, aum_path, rates_path, gov_path)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "a6228231",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Bond/Index</th>\n",
" <th>Description</th>\n",
" <th>Date</th>\n",
" <th>Total Return % 1-wk-LOC</th>\n",
" <th>Yield to Maturity (s.a.)</th>\n",
" <th>Yield to Maturity (conv.)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-01-07</td>\n",
" <td>0.484</td>\n",
" <td>3.06</td>\n",
" <td>3.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-01-14</td>\n",
" <td>0.414</td>\n",
" <td>3.00</td>\n",
" <td>3.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-01-21</td>\n",
" <td>0.006</td>\n",
" <td>3.02</td>\n",
" <td>3.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-01-28</td>\n",
" <td>0.208</td>\n",
" <td>3.00</td>\n",
" <td>3.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-02-04</td>\n",
" <td>0.435</td>\n",
" <td>3.01</td>\n",
" <td>3.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-02-11</td>\n",
" <td>0.221</td>\n",
" <td>2.98</td>\n",
" <td>3.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-02-18</td>\n",
" <td>-1.020</td>\n",
" <td>3.13</td>\n",
" <td>3.16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-02-25</td>\n",
" <td>-0.198</td>\n",
" <td>3.16</td>\n",
" <td>3.19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-03-04</td>\n",
" <td>0.228</td>\n",
" <td>3.15</td>\n",
" <td>3.18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-03-11</td>\n",
" <td>-0.380</td>\n",
" <td>3.20</td>\n",
" <td>3.23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-03-18</td>\n",
" <td>0.343</td>\n",
" <td>3.19</td>\n",
" <td>3.21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-03-25</td>\n",
" <td>0.528</td>\n",
" <td>3.15</td>\n",
" <td>3.17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-04-01</td>\n",
" <td>0.713</td>\n",
" <td>3.11</td>\n",
" <td>3.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-04-08</td>\n",
" <td>0.014</td>\n",
" <td>3.09</td>\n",
" <td>3.12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-04-15</td>\n",
" <td>0.515</td>\n",
" <td>3.02</td>\n",
" <td>3.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-04-22</td>\n",
" <td>0.322</td>\n",
" <td>2.98</td>\n",
" <td>3.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-04-29</td>\n",
" <td>0.444</td>\n",
" <td>2.91</td>\n",
" <td>2.94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-05-06</td>\n",
" <td>-0.182</td>\n",
" <td>2.95</td>\n",
" <td>2.97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-05-13</td>\n",
" <td>0.808</td>\n",
" <td>2.85</td>\n",
" <td>2.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>G0D0</td>\n",
" <td>ICE BofA German Government Index</td>\n",
" <td>2005-05-20</td>\n",
" <td>-0.090</td>\n",
" <td>2.89</td>\n",
" <td>2.91</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Bond/Index Description Date \\\n",
"0 G0D0 ICE BofA German Government Index 2005-01-07 \n",
"1 G0D0 ICE BofA German Government Index 2005-01-14 \n",
"2 G0D0 ICE BofA German Government Index 2005-01-21 \n",
"3 G0D0 ICE BofA German Government Index 2005-01-28 \n",
"4 G0D0 ICE BofA German Government Index 2005-02-04 \n",
"5 G0D0 ICE BofA German Government Index 2005-02-11 \n",
"6 G0D0 ICE BofA German Government Index 2005-02-18 \n",
"7 G0D0 ICE BofA German Government Index 2005-02-25 \n",
"8 G0D0 ICE BofA German Government Index 2005-03-04 \n",
"9 G0D0 ICE BofA German Government Index 2005-03-11 \n",
"10 G0D0 ICE BofA German Government Index 2005-03-18 \n",
"11 G0D0 ICE BofA German Government Index 2005-03-25 \n",
"12 G0D0 ICE BofA German Government Index 2005-04-01 \n",
"13 G0D0 ICE BofA German Government Index 2005-04-08 \n",
"14 G0D0 ICE BofA German Government Index 2005-04-15 \n",
"15 G0D0 ICE BofA German Government Index 2005-04-22 \n",
"16 G0D0 ICE BofA German Government Index 2005-04-29 \n",
"17 G0D0 ICE BofA German Government Index 2005-05-06 \n",
"18 G0D0 ICE BofA German Government Index 2005-05-13 \n",
"19 G0D0 ICE BofA German Government Index 2005-05-20 \n",
"\n",
" Total Return % 1-wk-LOC Yield to Maturity (s.a.) \\\n",
"0 0.484 3.06 \n",
"1 0.414 3.00 \n",
"2 0.006 3.02 \n",
"3 0.208 3.00 \n",
"4 0.435 3.01 \n",
"5 0.221 2.98 \n",
"6 -1.020 3.13 \n",
"7 -0.198 3.16 \n",
"8 0.228 3.15 \n",
"9 -0.380 3.20 \n",
"10 0.343 3.19 \n",
"11 0.528 3.15 \n",
"12 0.713 3.11 \n",
"13 0.014 3.09 \n",
"14 0.515 3.02 \n",
"15 0.322 2.98 \n",
"16 0.444 2.91 \n",
"17 -0.182 2.95 \n",
"18 0.808 2.85 \n",
"19 -0.090 2.89 \n",
"\n",
" Yield to Maturity (conv.) \n",
"0 3.08 \n",
"1 3.03 \n",
"2 3.04 \n",
"3 3.03 \n",
"4 3.03 \n",
"5 3.00 \n",
"6 3.16 \n",
"7 3.19 \n",
"8 3.18 \n",
"9 3.23 \n",
"10 3.21 \n",
"11 3.17 \n",
"12 3.13 \n",
"13 3.12 \n",
"14 3.04 \n",
"15 3.00 \n",
"16 2.94 \n",
"17 2.97 \n",
"18 2.88 \n",
"19 2.91 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gov.head(20)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}