Project_Carmignac/data_1.ipynb

3659 lines
339 KiB
Plaintext
Raw Permalink Normal View History

2026-02-01 23:12:11 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "e637deae-9168-4fb2-b95f-4e42d8d72d9e",
"metadata": {},
"source": [
"# DATA COLLECTION "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9f99615b-5a9d-434a-baa0-dca55edf7699",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>003</th>\n",
" <th>166</th>\n",
" <th>166.1</th>\n",
" <th>200000647</th>\n",
" <th>France</th>\n",
" <th>France.1</th>\n",
" <th>Diversified</th>\n",
" <th>Patrimoine</th>\n",
" <th>FCP</th>\n",
" <th>1989-11-07</th>\n",
" <th>...</th>\n",
" <th>Carmignac Patrimoine</th>\n",
" <th>A</th>\n",
" <th>EUR</th>\n",
" <th>FR0010135103</th>\n",
" <th>1989-11-07.1</th>\n",
" <th>NULL.1</th>\n",
" <th>2015-02-28</th>\n",
" <th>35.368000000</th>\n",
" <th>23920.7931</th>\n",
" <th>23920.7931.1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>1989-11-07</td>\n",
" <td>...</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>1989-11-07</td>\n",
" <td>NaN</td>\n",
" <td>2016-09-30</td>\n",
" <td>35.368</td>\n",
" <td>22501.1216</td>\n",
" <td>22501.1216</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>1989-11-07</td>\n",
" <td>...</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>1989-11-07</td>\n",
" <td>NaN</td>\n",
" <td>2018-01-31</td>\n",
" <td>35.368</td>\n",
" <td>23602.1274</td>\n",
" <td>23602.1274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>1989-11-07</td>\n",
" <td>...</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>1989-11-07</td>\n",
" <td>NaN</td>\n",
" <td>2019-11-30</td>\n",
" <td>0.000</td>\n",
" <td>0.0000</td>\n",
" <td>0.0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>1989-01-26</td>\n",
" <td>NaN</td>\n",
" <td>2015-02-28</td>\n",
" <td>193.970</td>\n",
" <td>241339.4137</td>\n",
" <td>241339.4137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>1989-01-26</td>\n",
" <td>NaN</td>\n",
" <td>2017-02-28</td>\n",
" <td>193.970</td>\n",
" <td>230036.7818</td>\n",
" <td>230036.7818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4842120</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>2013-11-25</td>\n",
" <td>...</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2015-11-19</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-31</td>\n",
" <td>2835.000</td>\n",
" <td>296115.7500</td>\n",
" <td>296115.7500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4842121</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>2013-11-25</td>\n",
" <td>...</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2015-11-19</td>\n",
" <td>NaN</td>\n",
" <td>2021-06-30</td>\n",
" <td>2835.000</td>\n",
" <td>296711.1000</td>\n",
" <td>296711.1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4842122</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>2013-11-25</td>\n",
" <td>...</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2015-11-19</td>\n",
" <td>NaN</td>\n",
" <td>2021-08-31</td>\n",
" <td>2835.000</td>\n",
" <td>297079.6500</td>\n",
" <td>297079.6500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4842123</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>2013-11-25</td>\n",
" <td>...</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>FW &amp; FW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1792391911</td>\n",
" <td>2018-03-16</td>\n",
" <td>NaN</td>\n",
" <td>2021-01-31</td>\n",
" <td>3524.100</td>\n",
" <td>356427.4740</td>\n",
" <td>356427.4740</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4842124</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>2013-11-25</td>\n",
" <td>...</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>FW &amp; FW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1792391911</td>\n",
" <td>2018-03-16</td>\n",
" <td>NaN</td>\n",
" <td>2021-04-30</td>\n",
" <td>4415.840</td>\n",
" <td>448693.5024</td>\n",
" <td>448693.5024</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4842125 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" 003 166 166.1 200000647 \\\n",
"0 003 166 166 200000647 \n",
"1 003 166 166 200000647 \n",
"2 003 166 166 200000647 \n",
"3 003 166 166 200000647 \n",
"4 003 166 166 200000647 \n",
"... ... ... ... ... \n",
"4842120 Private Client Private Client Private Client Private Client \n",
"4842121 Private Client Private Client Private Client Private Client \n",
"4842122 Private Client Private Client Private Client Private Client \n",
"4842123 Private Client Private Client Private Client Private Client \n",
"4842124 Private Client Private Client Private Client Private Client \n",
"\n",
" France France.1 Diversified Patrimoine FCP \\\n",
"0 France France Diversified Patrimoine FCP \n",
"1 France France Diversified Patrimoine FCP \n",
"2 France France Diversified Patrimoine FCP \n",
"3 France France Equity Investissement FCP \n",
"4 France France Equity Investissement FCP \n",
"... ... ... ... ... ... \n",
"4842120 Switzerland Switzerland Fixed Income Sécurité SICAV \n",
"4842121 Switzerland Switzerland Fixed Income Sécurité SICAV \n",
"4842122 Switzerland Switzerland Fixed Income Sécurité SICAV \n",
"4842123 Switzerland Switzerland Fixed Income Sécurité SICAV \n",
"4842124 Switzerland Switzerland Fixed Income Sécurité SICAV \n",
"\n",
" 1989-11-07 ... Carmignac Patrimoine A EUR \\\n",
"0 1989-11-07 ... Carmignac Patrimoine A EUR \n",
"1 1989-11-07 ... Carmignac Patrimoine A EUR \n",
"2 1989-11-07 ... Carmignac Patrimoine A EUR \n",
"3 1989-01-26 ... Carmignac Investissement A EUR \n",
"4 1989-01-26 ... Carmignac Investissement A EUR \n",
"... ... ... ... ... ... \n",
"4842120 2013-11-25 ... Carmignac Portfolio Sécurité AW & AW-R EUR \n",
"4842121 2013-11-25 ... Carmignac Portfolio Sécurité AW & AW-R EUR \n",
"4842122 2013-11-25 ... Carmignac Portfolio Sécurité AW & AW-R EUR \n",
"4842123 2013-11-25 ... Carmignac Portfolio Sécurité FW & FW-R EUR \n",
"4842124 2013-11-25 ... Carmignac Portfolio Sécurité FW & FW-R EUR \n",
"\n",
" FR0010135103 1989-11-07.1 NULL.1 2015-02-28 35.368000000 \\\n",
"0 FR0010135103 1989-11-07 NaN 2016-09-30 35.368 \n",
"1 FR0010135103 1989-11-07 NaN 2018-01-31 35.368 \n",
"2 FR0010135103 1989-11-07 NaN 2019-11-30 0.000 \n",
"3 FR0010148981 1989-01-26 NaN 2015-02-28 193.970 \n",
"4 FR0010148981 1989-01-26 NaN 2017-02-28 193.970 \n",
"... ... ... ... ... ... \n",
"4842120 LU1299306321 2015-11-19 NaN 2021-03-31 2835.000 \n",
"4842121 LU1299306321 2015-11-19 NaN 2021-06-30 2835.000 \n",
"4842122 LU1299306321 2015-11-19 NaN 2021-08-31 2835.000 \n",
"4842123 LU1792391911 2018-03-16 NaN 2021-01-31 3524.100 \n",
"4842124 LU1792391911 2018-03-16 NaN 2021-04-30 4415.840 \n",
"\n",
" 23920.7931 23920.7931.1 \n",
"0 22501.1216 22501.1216 \n",
"1 23602.1274 23602.1274 \n",
"2 0.0000 0.0000 \n",
"3 241339.4137 241339.4137 \n",
"4 230036.7818 230036.7818 \n",
"... ... ... \n",
"4842120 296115.7500 296115.7500 \n",
"4842121 296711.1000 296711.1000 \n",
"4842122 297079.6500 297079.6500 \n",
"4842123 356427.4740 356427.4740 \n",
"4842124 448693.5024 448693.5024 \n",
"\n",
"[4842125 rows x 21 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"chemin_fichier = \"s3://projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv\"\n",
"df_aum1 = pd.read_csv(chemin_fichier, sep=';', engine='python')\n",
"df_aum1"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "036d6acb-cc41-4dd6-9456-3aaf6fedbda4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>003</th>\n",
" <th>166</th>\n",
" <th>166.1</th>\n",
" <th>406533</th>\n",
" <th>France</th>\n",
" <th>France.1</th>\n",
" <th>Equity</th>\n",
" <th>Investissement</th>\n",
" <th>FCP</th>\n",
" <th>1989-01-26</th>\n",
" <th>...</th>\n",
" <th>2016-04-04</th>\n",
" <th>0.000000000</th>\n",
" <th>-25.580000000</th>\n",
" <th>-25.580000000.1</th>\n",
" <th>0.0000</th>\n",
" <th>-27030.1300</th>\n",
" <th>-27030.1300.1</th>\n",
" <th>0.0000.1</th>\n",
" <th>-27030.1300.2</th>\n",
" <th>-27030.1300.3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>2016-07-12</td>\n",
" <td>0.0</td>\n",
" <td>-8.87500</td>\n",
" <td>-8.87500</td>\n",
" <td>0.00</td>\n",
" <td>-10134.72</td>\n",
" <td>-10134.72</td>\n",
" <td>0.00</td>\n",
" <td>-10134.72</td>\n",
" <td>-10134.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>2016-11-22</td>\n",
" <td>0.0</td>\n",
" <td>-177.06500</td>\n",
" <td>-177.06500</td>\n",
" <td>0.00</td>\n",
" <td>-200504.86</td>\n",
" <td>-200504.86</td>\n",
" <td>0.00</td>\n",
" <td>-200504.86</td>\n",
" <td>-200504.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>2017-12-12</td>\n",
" <td>0.0</td>\n",
" <td>-72.63700</td>\n",
" <td>-72.63700</td>\n",
" <td>0.00</td>\n",
" <td>-86605.10</td>\n",
" <td>-86605.10</td>\n",
" <td>0.00</td>\n",
" <td>-86605.10</td>\n",
" <td>-86605.10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>006</td>\n",
" <td>968</td>\n",
" <td>968</td>\n",
" <td>412182</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>2018-09-18</td>\n",
" <td>0.0</td>\n",
" <td>-265.02125</td>\n",
" <td>-265.02125</td>\n",
" <td>0.00</td>\n",
" <td>-315155.32</td>\n",
" <td>-315155.32</td>\n",
" <td>0.00</td>\n",
" <td>-315155.32</td>\n",
" <td>-315155.32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>008</td>\n",
" <td>927</td>\n",
" <td>927</td>\n",
" <td>411986</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Multi Expertise</td>\n",
" <td>FCP</td>\n",
" <td>2002-01-02</td>\n",
" <td>...</td>\n",
" <td>2022-07-06</td>\n",
" <td>0.0</td>\n",
" <td>-0.29700</td>\n",
" <td>-0.29700</td>\n",
" <td>0.00</td>\n",
" <td>-1090.03</td>\n",
" <td>-1090.03</td>\n",
" <td>0.00</td>\n",
" <td>-1090.03</td>\n",
" <td>-1090.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2566305</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Global Bond</td>\n",
" <td>SICAV</td>\n",
" <td>2007-12-14</td>\n",
" <td>...</td>\n",
" <td>2015-04-07</td>\n",
" <td>0.0</td>\n",
" <td>-1.00000</td>\n",
" <td>-1.00000</td>\n",
" <td>0.00</td>\n",
" <td>-1382.36</td>\n",
" <td>-1382.36</td>\n",
" <td>0.00</td>\n",
" <td>-1382.36</td>\n",
" <td>-1382.36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2566306</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Global Bond</td>\n",
" <td>SICAV</td>\n",
" <td>2007-12-14</td>\n",
" <td>...</td>\n",
" <td>2015-04-10</td>\n",
" <td>0.0</td>\n",
" <td>-31.00000</td>\n",
" <td>-31.00000</td>\n",
" <td>0.00</td>\n",
" <td>-43323.43</td>\n",
" <td>-43323.43</td>\n",
" <td>0.00</td>\n",
" <td>-43323.43</td>\n",
" <td>-43323.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2566307</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>2015-10-12</td>\n",
" <td>97.0</td>\n",
" <td>0.00000</td>\n",
" <td>97.00000</td>\n",
" <td>166077.58</td>\n",
" <td>0.00</td>\n",
" <td>166077.58</td>\n",
" <td>166077.58</td>\n",
" <td>0.00</td>\n",
" <td>166077.58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2566308</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>2015-11-10</td>\n",
" <td>40.0</td>\n",
" <td>-22.00000</td>\n",
" <td>18.00000</td>\n",
" <td>68640.40</td>\n",
" <td>-37752.22</td>\n",
" <td>30888.18</td>\n",
" <td>68640.40</td>\n",
" <td>-37752.22</td>\n",
" <td>30888.18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2566309</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>FCP</td>\n",
" <td>1989-01-26</td>\n",
" <td>...</td>\n",
" <td>2015-12-17</td>\n",
" <td>26.0</td>\n",
" <td>0.00000</td>\n",
" <td>26.00000</td>\n",
" <td>44621.20</td>\n",
" <td>0.00</td>\n",
" <td>44621.20</td>\n",
" <td>44621.20</td>\n",
" <td>0.00</td>\n",
" <td>44621.20</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2566310 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" 003 166 166.1 406533 \\\n",
"0 003 166 166 406533 \n",
"1 003 166 166 406533 \n",
"2 003 166 166 406533 \n",
"3 006 968 968 412182 \n",
"4 008 927 927 411986 \n",
"... ... ... ... ... \n",
"2566305 Private Client Private Client Private Client Private Client \n",
"2566306 Private Client Private Client Private Client Private Client \n",
"2566307 Private Client Private Client Private Client Private Client \n",
"2566308 Private Client Private Client Private Client Private Client \n",
"2566309 Private Client Private Client Private Client Private Client \n",
"\n",
" France France.1 Equity Investissement FCP \\\n",
"0 France France Equity Investissement FCP \n",
"1 France France Equity Investissement FCP \n",
"2 France France Equity Investissement FCP \n",
"3 France France Equity Investissement FCP \n",
"4 France France Diversified Multi Expertise FCP \n",
"... ... ... ... ... ... \n",
"2566305 Luxembourg Luxembourg Fixed Income Global Bond SICAV \n",
"2566306 Luxembourg Luxembourg Fixed Income Global Bond SICAV \n",
"2566307 Luxembourg Luxembourg Fixed Income Sécurité FCP \n",
"2566308 Luxembourg Luxembourg Fixed Income Sécurité FCP \n",
"2566309 Luxembourg Luxembourg Fixed Income Sécurité FCP \n",
"\n",
" 1989-01-26 ... 2016-04-04 0.000000000 -25.580000000 \\\n",
"0 1989-01-26 ... 2016-07-12 0.0 -8.87500 \n",
"1 1989-01-26 ... 2016-11-22 0.0 -177.06500 \n",
"2 1989-01-26 ... 2017-12-12 0.0 -72.63700 \n",
"3 1989-01-26 ... 2018-09-18 0.0 -265.02125 \n",
"4 2002-01-02 ... 2022-07-06 0.0 -0.29700 \n",
"... ... ... ... ... ... \n",
"2566305 2007-12-14 ... 2015-04-07 0.0 -1.00000 \n",
"2566306 2007-12-14 ... 2015-04-10 0.0 -31.00000 \n",
"2566307 1989-01-26 ... 2015-10-12 97.0 0.00000 \n",
"2566308 1989-01-26 ... 2015-11-10 40.0 -22.00000 \n",
"2566309 1989-01-26 ... 2015-12-17 26.0 0.00000 \n",
"\n",
" -25.580000000.1 0.0000 -27030.1300 -27030.1300.1 0.0000.1 \\\n",
"0 -8.87500 0.00 -10134.72 -10134.72 0.00 \n",
"1 -177.06500 0.00 -200504.86 -200504.86 0.00 \n",
"2 -72.63700 0.00 -86605.10 -86605.10 0.00 \n",
"3 -265.02125 0.00 -315155.32 -315155.32 0.00 \n",
"4 -0.29700 0.00 -1090.03 -1090.03 0.00 \n",
"... ... ... ... ... ... \n",
"2566305 -1.00000 0.00 -1382.36 -1382.36 0.00 \n",
"2566306 -31.00000 0.00 -43323.43 -43323.43 0.00 \n",
"2566307 97.00000 166077.58 0.00 166077.58 166077.58 \n",
"2566308 18.00000 68640.40 -37752.22 30888.18 68640.40 \n",
"2566309 26.00000 44621.20 0.00 44621.20 44621.20 \n",
"\n",
" -27030.1300.2 -27030.1300.3 \n",
"0 -10134.72 -10134.72 \n",
"1 -200504.86 -200504.86 \n",
"2 -86605.10 -86605.10 \n",
"3 -315155.32 -315155.32 \n",
"4 -1090.03 -1090.03 \n",
"... ... ... \n",
"2566305 -1382.36 -1382.36 \n",
"2566306 -43323.43 -43323.43 \n",
"2566307 0.00 166077.58 \n",
"2566308 -37752.22 30888.18 \n",
"2566309 0.00 44621.20 \n",
"\n",
"[2566310 rows x 27 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chemin_fichier = \"s3://projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv\"\n",
"df_flows1 = pd.read_csv(chemin_fichier, sep=';', engine='python')\n",
"df_flows1"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f8508d94-74a7-4bb0-8b81-c2e06850c25f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>35.368</td>\n",
" <td>2.464867e+04</td>\n",
" <td>2.464867e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-11-30</td>\n",
" <td>35.368</td>\n",
" <td>2.241306e+04</td>\n",
" <td>2.241306e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-12-31</td>\n",
" <td>35.368</td>\n",
" <td>2.205124e+04</td>\n",
" <td>2.205124e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-03-31</td>\n",
" <td>35.368</td>\n",
" <td>2.162612e+04</td>\n",
" <td>2.162612e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-11-30</td>\n",
" <td>35.368</td>\n",
" <td>2.248945e+04</td>\n",
" <td>2.248945e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4880292</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2020-02-29</td>\n",
" <td>26801.000</td>\n",
" <td>2.740670e+06</td>\n",
" <td>2.740670e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4880293</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2020-06-30</td>\n",
" <td>3099.000</td>\n",
" <td>3.122862e+05</td>\n",
" <td>3.122862e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4880294</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2020-10-31</td>\n",
" <td>3099.000</td>\n",
" <td>3.184222e+05</td>\n",
" <td>3.184222e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4880295</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2021-07-31</td>\n",
" <td>2835.000</td>\n",
" <td>2.976183e+05</td>\n",
" <td>2.976183e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4880296</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Sécurité</td>\n",
" <td>FW &amp; FW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1792391911</td>\n",
" <td>2020-07-31</td>\n",
" <td>2916.394</td>\n",
" <td>2.874106e+05</td>\n",
" <td>2.874106e+05</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4880297 rows × 18 columns</p>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"... ... ... ... \n",
"4880292 Private Client Private Client Private Client \n",
"4880293 Private Client Private Client Private Client \n",
"4880294 Private Client Private Client Private Client \n",
"4880295 Private Client Private Client Private Client \n",
"4880296 Private Client Private Client Private Client \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"... ... ... \n",
"4880292 Private Client Switzerland \n",
"4880293 Private Client Switzerland \n",
"4880294 Private Client Switzerland \n",
"4880295 Private Client Switzerland \n",
"4880296 Private Client Switzerland \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"... ... ... ... \n",
"4880292 Switzerland Fixed Income Sécurité \n",
"4880293 Switzerland Fixed Income Sécurité \n",
"4880294 Switzerland Fixed Income Sécurité \n",
"4880295 Switzerland Fixed Income Sécurité \n",
"4880296 Switzerland Fixed Income Sécurité \n",
"\n",
" Product - Legal Status Product - Is Dedie ? \\\n",
"0 FCP NO \n",
"1 FCP NO \n",
"2 FCP NO \n",
"3 FCP NO \n",
"4 FCP NO \n",
"... ... ... \n",
"4880292 SICAV NO \n",
"4880293 SICAV NO \n",
"4880294 SICAV NO \n",
"4880295 SICAV NO \n",
"4880296 SICAV NO \n",
"\n",
" Product - Fund Product - Shareclass Type \\\n",
"0 Carmignac Patrimoine A \n",
"1 Carmignac Patrimoine A \n",
"2 Carmignac Patrimoine A \n",
"3 Carmignac Patrimoine A \n",
"4 Carmignac Patrimoine A \n",
"... ... ... \n",
"4880292 Carmignac Portfolio Sécurité AW & AW-R \n",
"4880293 Carmignac Portfolio Sécurité AW & AW-R \n",
"4880294 Carmignac Portfolio Sécurité AW & AW-R \n",
"4880295 Carmignac Portfolio Sécurité AW & AW-R \n",
"4880296 Carmignac Portfolio Sécurité FW & FW-R \n",
"\n",
" Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
"0 EUR FR0010135103 2015-03-31 \n",
"1 EUR FR0010135103 2015-11-30 \n",
"2 EUR FR0010135103 2015-12-31 \n",
"3 EUR FR0010135103 2016-03-31 \n",
"4 EUR FR0010135103 2016-11-30 \n",
"... ... ... ... \n",
"4880292 EUR LU1299306321 2020-02-29 \n",
"4880293 EUR LU1299306321 2020-06-30 \n",
"4880294 EUR LU1299306321 2020-10-31 \n",
"4880295 EUR LU1299306321 2021-07-31 \n",
"4880296 EUR LU1792391911 2020-07-31 \n",
"\n",
" Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 35.368 2.464867e+04 2.464867e+04 \n",
"1 35.368 2.241306e+04 2.241306e+04 \n",
"2 35.368 2.205124e+04 2.205124e+04 \n",
"3 35.368 2.162612e+04 2.162612e+04 \n",
"4 35.368 2.248945e+04 2.248945e+04 \n",
"... ... ... ... \n",
"4880292 26801.000 2.740670e+06 2.740670e+06 \n",
"4880293 3099.000 3.122862e+05 3.122862e+05 \n",
"4880294 3099.000 3.184222e+05 3.184222e+05 \n",
"4880295 2835.000 2.976183e+05 2.976183e+05 \n",
"4880296 2916.394 2.874106e+05 2.874106e+05 \n",
"\n",
"[4880297 rows x 18 columns]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd \n",
"chemin_fichier = \"s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv\"\n",
"df_aum2 = pd.read_csv(chemin_fichier, sep=';', engine='python')\n",
"df_aum2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4644da13-5aea-4ca0-9fcf-947324766292",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>...</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - Subscription</th>\n",
" <th>Quantity - Redemption</th>\n",
" <th>Quantity - NetFlows</th>\n",
" <th>Value Ccy - Subscription</th>\n",
" <th>Value Ccy - Redemption</th>\n",
" <th>Value Ccy - NetFlows</th>\n",
" <th>Value € - Subscription</th>\n",
" <th>Value € - Redemption</th>\n",
" <th>Value € - NetFlows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200127202</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2020-11-05</td>\n",
" <td>1636.000</td>\n",
" <td>0.000</td>\n",
" <td>1636.000</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2015-03-09</td>\n",
" <td>144.690</td>\n",
" <td>0.000</td>\n",
" <td>144.690</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2016-10-26</td>\n",
" <td>0.000</td>\n",
" <td>-8.321</td>\n",
" <td>-8.321</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2018-10-18</td>\n",
" <td>0.000</td>\n",
" <td>-22.083</td>\n",
" <td>-22.083</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2019-04-08</td>\n",
" <td>0.000</td>\n",
" <td>-465.992</td>\n",
" <td>-465.992</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574456</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2015-06-12</td>\n",
" <td>0.000</td>\n",
" <td>-20.000</td>\n",
" <td>-20.000</td>\n",
" <td>0.00</td>\n",
" <td>-34294.40</td>\n",
" <td>-34294.40</td>\n",
" <td>0.00</td>\n",
" <td>-34294.40</td>\n",
" <td>-34294.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574457</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2015-09-18</td>\n",
" <td>328.726</td>\n",
" <td>0.000</td>\n",
" <td>328.726</td>\n",
" <td>564028.07</td>\n",
" <td>0.00</td>\n",
" <td>564028.07</td>\n",
" <td>564028.07</td>\n",
" <td>0.00</td>\n",
" <td>564028.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574458</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2015-09-25</td>\n",
" <td>4.443</td>\n",
" <td>0.000</td>\n",
" <td>4.443</td>\n",
" <td>7603.66</td>\n",
" <td>0.00</td>\n",
" <td>7603.66</td>\n",
" <td>7603.66</td>\n",
" <td>0.00</td>\n",
" <td>7603.66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574459</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2015-11-09</td>\n",
" <td>0.000</td>\n",
" <td>-440.000</td>\n",
" <td>-440.000</td>\n",
" <td>0.00</td>\n",
" <td>-754696.80</td>\n",
" <td>-754696.80</td>\n",
" <td>0.00</td>\n",
" <td>-754696.80</td>\n",
" <td>-754696.80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574460</th>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Private Client</td>\n",
" <td>Luxembourg</td>\n",
" <td>Luxembourg</td>\n",
" <td>Fixed Income</td>\n",
" <td>Sécurité</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2016-01-11</td>\n",
" <td>3595.000</td>\n",
" <td>0.000</td>\n",
" <td>3595.000</td>\n",
" <td>358385.55</td>\n",
" <td>0.00</td>\n",
" <td>358385.55</td>\n",
" <td>358385.55</td>\n",
" <td>0.00</td>\n",
" <td>358385.55</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2574461 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"... ... ... ... \n",
"2574456 Private Client Private Client Private Client \n",
"2574457 Private Client Private Client Private Client \n",
"2574458 Private Client Private Client Private Client \n",
"2574459 Private Client Private Client Private Client \n",
"2574460 Private Client Private Client Private Client \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
"... ... ... \n",
"2574456 Private Client Luxembourg \n",
"2574457 Private Client Luxembourg \n",
"2574458 Private Client Luxembourg \n",
"2574459 Private Client Luxembourg \n",
"2574460 Private Client Luxembourg \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
"... ... ... ... \n",
"2574456 Luxembourg Fixed Income Sécurité \n",
"2574457 Luxembourg Fixed Income Sécurité \n",
"2574458 Luxembourg Fixed Income Sécurité \n",
"2574459 Luxembourg Fixed Income Sécurité \n",
"2574460 Luxembourg Fixed Income Sécurité \n",
"\n",
" Product - Legal Status Product - Is Dedie ? ... Centralisation Date \\\n",
"0 SICAV NO ... 2020-11-05 \n",
"1 FCP NO ... 2015-03-09 \n",
"2 FCP NO ... 2016-10-26 \n",
"3 FCP NO ... 2018-10-18 \n",
"4 FCP NO ... 2019-04-08 \n",
"... ... ... ... ... \n",
"2574456 FCP NO ... 2015-06-12 \n",
"2574457 FCP NO ... 2015-09-18 \n",
"2574458 FCP NO ... 2015-09-25 \n",
"2574459 FCP NO ... 2015-11-09 \n",
"2574460 SICAV NO ... 2016-01-11 \n",
"\n",
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.000 0.000 1636.000 \n",
"1 144.690 0.000 144.690 \n",
"2 0.000 -8.321 -8.321 \n",
"3 0.000 -22.083 -22.083 \n",
"4 0.000 -465.992 -465.992 \n",
"... ... ... ... \n",
"2574456 0.000 -20.000 -20.000 \n",
"2574457 328.726 0.000 328.726 \n",
"2574458 4.443 0.000 4.443 \n",
"2574459 0.000 -440.000 -440.000 \n",
"2574460 3595.000 0.000 3595.000 \n",
"\n",
" Value Ccy - Subscription Value Ccy - Redemption \\\n",
"0 280983.00 0.00 \n",
"1 99985.13 0.00 \n",
"2 0.00 -9384.76 \n",
"3 0.00 -25227.40 \n",
"4 0.00 -563775.76 \n",
"... ... ... \n",
"2574456 0.00 -34294.40 \n",
"2574457 564028.07 0.00 \n",
"2574458 7603.66 0.00 \n",
"2574459 0.00 -754696.80 \n",
"2574460 358385.55 0.00 \n",
"\n",
" Value Ccy - NetFlows Value € - Subscription Value € - Redemption \\\n",
"0 280983.00 280983.00 0.00 \n",
"1 99985.13 99985.13 0.00 \n",
"2 -9384.76 0.00 -9384.76 \n",
"3 -25227.40 0.00 -25227.40 \n",
"4 -563775.76 0.00 -563775.76 \n",
"... ... ... ... \n",
"2574456 -34294.40 0.00 -34294.40 \n",
"2574457 564028.07 564028.07 0.00 \n",
"2574458 7603.66 7603.66 0.00 \n",
"2574459 -754696.80 0.00 -754696.80 \n",
"2574460 358385.55 358385.55 0.00 \n",
"\n",
" Value € - NetFlows \n",
"0 280983.00 \n",
"1 99985.13 \n",
"2 -9384.76 \n",
"3 -25227.40 \n",
"4 -563775.76 \n",
"... ... \n",
"2574456 -34294.40 \n",
"2574457 564028.07 \n",
"2574458 7603.66 \n",
"2574459 -754696.80 \n",
"2574460 358385.55 \n",
"\n",
"[2574461 rows x 24 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chemin_fichier = \"s3://projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv\"\n",
"df_flows2 = pd.read_csv(chemin_fichier, sep=';', engine='python')\n",
"df_flows2"
]
},
{
"cell_type": "markdown",
"id": "59d31eaf-c06c-4ebe-9f8c-cb9158a50976",
"metadata": {},
"source": [
"## DATA ANALYSIS"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5773b911-6b84-448d-962f-8228eeac0250",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(4880297, 18)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_aum2.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6f571810-c373-4d30-8ca5-c3a074b95b08",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id',\n",
" 'Registrar Account - ID', 'Registrar Account - Region',\n",
" 'RegistrarAccount - Country', 'Product - Asset Type',\n",
" 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?',\n",
" 'Product - Fund', 'Product - Shareclass Type',\n",
" 'Product - Shareclass Currency', 'Product - Isin',\n",
" 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY',\n",
" 'Value - AUM €'],\n",
" dtype='object')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_aum2.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "73c15272-a789-4928-b23e-651dd886efd6",
"metadata": {},
"outputs": [],
"source": [
"'Agreement - Code': Identifie le contrat commercial ou opérationnel entre Carmignac et lintermédiaire distributeur (banque, assurance, plateforme)., \n",
"\n",
"'Company - Id' : Identifiant interne de lentité distributrice (banque privée, CGP, assurance…).Chaque \"Company\" = un partenaire B2B, \n",
"\n",
"'Company - Ultimate Parent Id': Identifiant du groupe parent de la société distributrice.\n",
"\n",
"Ex : une banque privée appartient à un grand groupe bancaire //// OU plusieurs filiales → même Ultimate Parent.\n",
"Sert à agréger les flux ou AUM au niveau du groupe, pas seulement dune filiale.,\n",
"\n",
"'Registrar Account - ID', \n",
"Identifiant du compte investisseur dans les registres.Ce nest PAS le client final : cest souvent un compte omnibus, une plateforme, \n",
"une banque dépositaire, etc.s\n",
"→ IMPORTANT car un même investisseur final peut être caché sous plusieurs comptes.,\n",
"\n",
"'Registrar Account - Region',\n",
"\n",
"'RegistrarAccount - Country', \n",
"\n",
"'Product - Asset Type', \n",
"'Product - Strategy', \n",
"'Product - Legal Status': Le cadre réglementaire du fonds, typiquement : UCITS,AIF,SICAV,FCP, \n",
"'Product - Is Dedie ?': Indique si la part est dédiée à un distributeur particulier (shareclass ségrégée). Valeurs : Oui/Non,\n",
"\n",
"'Product - Fund', \n",
"'Product - Shareclass Type': Chaque share classe peut avoir : Devise différente (EUR, USD) /// Frais différents //Distribution des dividendes \n",
"différente .............,\n",
"\n",
"'Product - Shareclass Currency', \n",
"'Product - Isin': c est un code qui identifie un produit financier, pas un client , #REVOIRR \n",
"'Centralisation Date': Cette date nest pas la date de transaction, mais celle où les positions sont centralisées et publiees pour reporting., ....\n",
"'Quantity - AUM', \n",
"'Value - AUM CCY', Valeur des AUM dans la devise de la share classe,\n",
"'Value - AUM €'\n",
"\n",
"\n",
"AUM = Assets Under Management = Encours sous gestion\n",
"Ce sont les montants totaux investis dans un fonds ou un compte.\n",
"\n",
"Exemple :\n",
"- Quantity : Compte A détient 1 000 parts de Carmignac Patrimoine A EUR\n",
"- Price ou valeurs de la part = 100 EUR\n",
"- AUM Value = 1 000 × 100 = 100 000 EUR\n",
"==> AUM représente la valeur financière totale détenue par le compte pour ce fonds.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b179431d-0d07-4eff-8bb7-976ebbe3f987",
"metadata": {},
"outputs": [],
"source": [
"#FLOWS VS AUM\n",
"Flows = mouvements entrants (subscriptions) ou sortants (redemptions) pendant une période\n",
"AUM = stock à un instant donné\n",
"\n",
"\n",
"\n",
"REMARQUE: CLIENT IDENTIFICATION : le client final nest pas directement identifiable, car intermediaire.\n",
"\n",
"1- Registrar Account ID → identifie le compte dans le registre → peut représenter un ou plusieurs clients finaux.\n",
"2- Company - Id Ultimate Parent → identifie le distributeur, pas le client final.\n",
"\n",
"En pratique pour le clustering client : Il faudra utiliser Registrar Account - ID + Company - Id comme proxy client.\n",
"Mais attention : même investisseur peut avoir plusieurs comptes / plusieurs banques."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "af25fd07-a613-4adc-b88b-93a8d300379c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2574461, 24)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_flows2.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c6d0fe83-2957-430b-89cf-cd30833b7cab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id',\n",
" 'Registrar Account - ID', 'Registrar Account - Region',\n",
" 'RegistrarAccount - Country', 'Product - Asset Type',\n",
" 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?',\n",
" 'Product - Fund', 'Product - Shareclass Type',\n",
" 'Product - Shareclass Currency', 'Product - Isin',\n",
" 'Centralisation Date', 'Quantity - Subscription',\n",
" 'Quantity - Redemption', 'Quantity - NetFlows',\n",
" 'Value Ccy - Subscription', 'Value Ccy - Redemption',\n",
" 'Value Ccy - NetFlows', 'Value € - Subscription',\n",
" 'Value € - Redemption', 'Value € - NetFlows'],\n",
" dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_flows2.columns"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5fac74b0-662f-48d0-a234-7edc3c3e86ad",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Colonne</th>\n",
" <th>Nbr Lignes</th>\n",
" <th>Nb valeurs uniques</th>\n",
" <th>Exemples de valeurs</th>\n",
" <th>Nan Values</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Agreement - Code</td>\n",
" <td>4880297</td>\n",
" <td>2427</td>\n",
" <td>[003, 005, 006, 008, 009, 011, 016, 020, 021, ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Company - Id</td>\n",
" <td>4880297</td>\n",
" <td>1562</td>\n",
" <td>[166, nan, 968, 927, 42, 15181, 13536, 231, 48...</td>\n",
" <td>113750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Company - Ultimate Parent Id</td>\n",
" <td>4880297</td>\n",
" <td>1068</td>\n",
" <td>[166, nan, 968, 927, 42, 877, 13536, 14977, 48...</td>\n",
" <td>113750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Registrar Account - ID</td>\n",
" <td>4880297</td>\n",
" <td>12501</td>\n",
" <td>[200000647, 200000654, 200127202, 404391, 4054...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Registrar Account - Region</td>\n",
" <td>4880297</td>\n",
" <td>15</td>\n",
" <td>[France, Switzerland, Luxembourg, Belgium, Int...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>RegistrarAccount - Country</td>\n",
" <td>4880297</td>\n",
" <td>39</td>\n",
" <td>[France, Switzerland, Luxembourg, Belgium, Mon...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Product - Asset Type</td>\n",
" <td>4880297</td>\n",
" <td>6</td>\n",
" <td>[Diversified, Equity, nan, Alternative, Fixed ...</td>\n",
" <td>315831</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Product - Strategy</td>\n",
" <td>4880297</td>\n",
" <td>53</td>\n",
" <td>[Patrimoine, Investissement, Euro-Investisseme...</td>\n",
" <td>75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Product - Legal Status</td>\n",
" <td>4880297</td>\n",
" <td>6</td>\n",
" <td>[FCP, SICAV, FCPE, ICAV, OEIC, Others]</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Product - Is Dedie ?</td>\n",
" <td>4880297</td>\n",
" <td>2</td>\n",
" <td>[NO, YES]</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Product - Fund</td>\n",
" <td>4880297</td>\n",
" <td>74</td>\n",
" <td>[Carmignac Patrimoine, Carmignac Investissemen...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Product - Shareclass Type</td>\n",
" <td>4880297</td>\n",
" <td>12</td>\n",
" <td>[A, F, AW &amp; AW-R, FW &amp; FW-R, Not Applicable, E...</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Product - Shareclass Currency</td>\n",
" <td>4880297</td>\n",
" <td>6</td>\n",
" <td>[EUR, CHF, USD, GBP, JPY, CAD]</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Product - Isin</td>\n",
" <td>4880297</td>\n",
" <td>491</td>\n",
" <td>[FR0010135103, FR0010148981, LU0992625839, FR0...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Centralisation Date</td>\n",
" <td>4880297</td>\n",
" <td>130</td>\n",
" <td>[2015-03-31, 2015-11-30, 2015-12-31, 2016-03-3...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Quantity - AUM</td>\n",
" <td>4880297</td>\n",
" <td>554404</td>\n",
" <td>[35.368, 0.0, 193.97, 170.2, 285.2, 277.8, 189...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Value - AUM CCY</td>\n",
" <td>4880297</td>\n",
" <td>1689620</td>\n",
" <td>[24648.6666, 22413.0553, 22051.2406, 21626.117...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Value - AUM €</td>\n",
" <td>4880297</td>\n",
" <td>1697923</td>\n",
" <td>[24648.6666, 22413.0553, 22051.2406, 21626.117...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Colonne Nbr Lignes Nb valeurs uniques \\\n",
"0 Agreement - Code 4880297 2427 \n",
"1 Company - Id 4880297 1562 \n",
"2 Company - Ultimate Parent Id 4880297 1068 \n",
"3 Registrar Account - ID 4880297 12501 \n",
"4 Registrar Account - Region 4880297 15 \n",
"5 RegistrarAccount - Country 4880297 39 \n",
"6 Product - Asset Type 4880297 6 \n",
"7 Product - Strategy 4880297 53 \n",
"8 Product - Legal Status 4880297 6 \n",
"9 Product - Is Dedie ? 4880297 2 \n",
"10 Product - Fund 4880297 74 \n",
"11 Product - Shareclass Type 4880297 12 \n",
"12 Product - Shareclass Currency 4880297 6 \n",
"13 Product - Isin 4880297 491 \n",
"14 Centralisation Date 4880297 130 \n",
"15 Quantity - AUM 4880297 554404 \n",
"16 Value - AUM CCY 4880297 1689620 \n",
"17 Value - AUM € 4880297 1697923 \n",
"\n",
" Exemples de valeurs Nan Values \n",
"0 [003, 005, 006, 008, 009, 011, 016, 020, 021, ... 0 \n",
"1 [166, nan, 968, 927, 42, 15181, 13536, 231, 48... 113750 \n",
"2 [166, nan, 968, 927, 42, 877, 13536, 14977, 48... 113750 \n",
"3 [200000647, 200000654, 200127202, 404391, 4054... 0 \n",
"4 [France, Switzerland, Luxembourg, Belgium, Int... 0 \n",
"5 [France, Switzerland, Luxembourg, Belgium, Mon... 0 \n",
"6 [Diversified, Equity, nan, Alternative, Fixed ... 315831 \n",
"7 [Patrimoine, Investissement, Euro-Investisseme... 75 \n",
"8 [FCP, SICAV, FCPE, ICAV, OEIC, Others] 0 \n",
"9 [NO, YES] 0 \n",
"10 [Carmignac Patrimoine, Carmignac Investissemen... 0 \n",
"11 [A, F, AW & AW-R, FW & FW-R, Not Applicable, E... 35 \n",
"12 [EUR, CHF, USD, GBP, JPY, CAD] 0 \n",
"13 [FR0010135103, FR0010148981, LU0992625839, FR0... 0 \n",
"14 [2015-03-31, 2015-11-30, 2015-12-31, 2016-03-3... 0 \n",
"15 [35.368, 0.0, 193.97, 170.2, 285.2, 277.8, 189... 0 \n",
"16 [24648.6666, 22413.0553, 22051.2406, 21626.117... 0 \n",
"17 [24648.6666, 22413.0553, 22051.2406, 21626.117... 0 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#dict avec valeurs unique de chaque col \n",
"rows = []\n",
"\n",
"for col in df_aum2.columns:\n",
" uniques = df_aum2[col].unique()\n",
" rows.append({\n",
" \"Colonne\": col,\n",
" \"Nbr Lignes\": df_aum2.shape[0], #4.8millions\n",
" \"Nb valeurs uniques\": len(uniques),\n",
" \"Exemples de valeurs\": uniques,\n",
" \"Nan Values\" : df_aum2[col].isna().sum()\n",
" })\n",
"\n",
"df_uniques = pd.DataFrame(rows)\n",
"df_uniques"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5de53ba3-b3db-4935-acac-435b05b909e2",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Colonne</th>\n",
" <th>Nbr Lignes</th>\n",
" <th>Nb valeurs uniques</th>\n",
" <th>Exemples de valeurs</th>\n",
" <th>Nan Values</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Agreement - Code</td>\n",
" <td>2574461</td>\n",
" <td>1611</td>\n",
" <td>[003, 008, 006, 009, 016, 020, 021, 022, 030, ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Company - Id</td>\n",
" <td>2574461</td>\n",
" <td>1231</td>\n",
" <td>[166, 927, 968, 42, 13536, 231, 481, 91, 1038,...</td>\n",
" <td>1540</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Company - Ultimate Parent Id</td>\n",
" <td>2574461</td>\n",
" <td>815</td>\n",
" <td>[166, 927, 968, 42, 13536, 14977, 481, 91, 103...</td>\n",
" <td>1540</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Registrar Account - ID</td>\n",
" <td>2574461</td>\n",
" <td>6842</td>\n",
" <td>[200127202, 406533, 411986, 200000647, 412028,...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Registrar Account - Region</td>\n",
" <td>2574461</td>\n",
" <td>15</td>\n",
" <td>[France, International, Belgium, Spain, Luxemb...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>RegistrarAccount - Country</td>\n",
" <td>2574461</td>\n",
" <td>34</td>\n",
" <td>[France, Monaco, Israel, Belgium, Spain, Luxem...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Product - Asset Type</td>\n",
" <td>2574461</td>\n",
" <td>6</td>\n",
" <td>[Equity, Diversified, Fixed Income, Alternativ...</td>\n",
" <td>2049</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Product - Strategy</td>\n",
" <td>2574461</td>\n",
" <td>50</td>\n",
" <td>[Investissement, Patrimoine, Emerging Patrimoi...</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Product - Legal Status</td>\n",
" <td>2574461</td>\n",
" <td>6</td>\n",
" <td>[SICAV, FCP, FCPE, ICAV, OEIC, Others]</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Product - Is Dedie ?</td>\n",
" <td>2574461</td>\n",
" <td>2</td>\n",
" <td>[NO, YES]</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Product - Fund</td>\n",
" <td>2574461</td>\n",
" <td>70</td>\n",
" <td>[Carmignac Portfolio Investissement, Carmignac...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Product - Shareclass Type</td>\n",
" <td>2574461</td>\n",
" <td>11</td>\n",
" <td>[F, A, AW &amp; AW-R, FW &amp; FW-R, E, IW, Dedicated,...</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Product - Shareclass Currency</td>\n",
" <td>2574461</td>\n",
" <td>6</td>\n",
" <td>[EUR, USD, CHF, GBP, JPY, CAD]</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Product - Isin</td>\n",
" <td>2574461</td>\n",
" <td>474</td>\n",
" <td>[LU0992625839, FR0010135103, FR0010148981, LU0...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Centralisation Date</td>\n",
" <td>2574461</td>\n",
" <td>2780</td>\n",
" <td>[2020-11-05, 2015-03-09, 2016-10-26, 2018-10-1...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Quantity - Subscription</td>\n",
" <td>2574461</td>\n",
" <td>359661</td>\n",
" <td>[1636.0, 144.69, 0.0, 160.698, 17.285, 115.0, ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Quantity - Redemption</td>\n",
" <td>2574461</td>\n",
" <td>374378</td>\n",
" <td>[0.0, -8.321, -22.083, -465.992, -0.397, -36.0...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Quantity - NetFlows</td>\n",
" <td>2574461</td>\n",
" <td>667586</td>\n",
" <td>[1636.0, 144.69, -8.321, -22.083, -465.992, 16...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Value Ccy - Subscription</td>\n",
" <td>2574461</td>\n",
" <td>926633</td>\n",
" <td>[280983.0, 99985.13, 0.0, 21851.77, 19795.99, ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Value Ccy - Redemption</td>\n",
" <td>2574461</td>\n",
" <td>1296468</td>\n",
" <td>[0.0, -9384.76, -25227.4, -563775.76, -1536.53...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Value Ccy - NetFlows</td>\n",
" <td>2574461</td>\n",
" <td>1972319</td>\n",
" <td>[280983.0, 99985.13, -9384.76, -25227.4, -5637...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Value € - Subscription</td>\n",
" <td>2574461</td>\n",
" <td>955890</td>\n",
" <td>[280983.0, 99985.13, 0.0, 21851.77, 19795.99, ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Value € - Redemption</td>\n",
" <td>2574461</td>\n",
" <td>1323531</td>\n",
" <td>[0.0, -9384.76, -25227.4, -563775.76, -1536.53...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Value € - NetFlows</td>\n",
" <td>2574461</td>\n",
" <td>2018916</td>\n",
" <td>[280983.0, 99985.13, -9384.76, -25227.4, -5637...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Colonne Nbr Lignes Nb valeurs uniques \\\n",
"0 Agreement - Code 2574461 1611 \n",
"1 Company - Id 2574461 1231 \n",
"2 Company - Ultimate Parent Id 2574461 815 \n",
"3 Registrar Account - ID 2574461 6842 \n",
"4 Registrar Account - Region 2574461 15 \n",
"5 RegistrarAccount - Country 2574461 34 \n",
"6 Product - Asset Type 2574461 6 \n",
"7 Product - Strategy 2574461 50 \n",
"8 Product - Legal Status 2574461 6 \n",
"9 Product - Is Dedie ? 2574461 2 \n",
"10 Product - Fund 2574461 70 \n",
"11 Product - Shareclass Type 2574461 11 \n",
"12 Product - Shareclass Currency 2574461 6 \n",
"13 Product - Isin 2574461 474 \n",
"14 Centralisation Date 2574461 2780 \n",
"15 Quantity - Subscription 2574461 359661 \n",
"16 Quantity - Redemption 2574461 374378 \n",
"17 Quantity - NetFlows 2574461 667586 \n",
"18 Value Ccy - Subscription 2574461 926633 \n",
"19 Value Ccy - Redemption 2574461 1296468 \n",
"20 Value Ccy - NetFlows 2574461 1972319 \n",
"21 Value € - Subscription 2574461 955890 \n",
"22 Value € - Redemption 2574461 1323531 \n",
"23 Value € - NetFlows 2574461 2018916 \n",
"\n",
" Exemples de valeurs Nan Values \n",
"0 [003, 008, 006, 009, 016, 020, 021, 022, 030, ... 0 \n",
"1 [166, 927, 968, 42, 13536, 231, 481, 91, 1038,... 1540 \n",
"2 [166, 927, 968, 42, 13536, 14977, 481, 91, 103... 1540 \n",
"3 [200127202, 406533, 411986, 200000647, 412028,... 0 \n",
"4 [France, International, Belgium, Spain, Luxemb... 0 \n",
"5 [France, Monaco, Israel, Belgium, Spain, Luxem... 0 \n",
"6 [Equity, Diversified, Fixed Income, Alternativ... 2049 \n",
"7 [Investissement, Patrimoine, Emerging Patrimoi... 6 \n",
"8 [SICAV, FCP, FCPE, ICAV, OEIC, Others] 0 \n",
"9 [NO, YES] 0 \n",
"10 [Carmignac Portfolio Investissement, Carmignac... 0 \n",
"11 [F, A, AW & AW-R, FW & FW-R, E, IW, Dedicated,... 2 \n",
"12 [EUR, USD, CHF, GBP, JPY, CAD] 0 \n",
"13 [LU0992625839, FR0010135103, FR0010148981, LU0... 0 \n",
"14 [2020-11-05, 2015-03-09, 2016-10-26, 2018-10-1... 0 \n",
"15 [1636.0, 144.69, 0.0, 160.698, 17.285, 115.0, ... 0 \n",
"16 [0.0, -8.321, -22.083, -465.992, -0.397, -36.0... 0 \n",
"17 [1636.0, 144.69, -8.321, -22.083, -465.992, 16... 0 \n",
"18 [280983.0, 99985.13, 0.0, 21851.77, 19795.99, ... 0 \n",
"19 [0.0, -9384.76, -25227.4, -563775.76, -1536.53... 0 \n",
"20 [280983.0, 99985.13, -9384.76, -25227.4, -5637... 0 \n",
"21 [280983.0, 99985.13, 0.0, 21851.77, 19795.99, ... 0 \n",
"22 [0.0, -9384.76, -25227.4, -563775.76, -1536.53... 0 \n",
"23 [280983.0, 99985.13, -9384.76, -25227.4, -5637... 0 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#dict avec valeurs unique de chaque col \n",
"rowsf = []\n",
"\n",
"for col in df_flows2.columns:\n",
" uniques = df_flows2[col].unique()\n",
" rowsf.append({\n",
" \"Colonne\": col,\n",
" \"Nbr Lignes\": df_flows2.shape[0], #4.8millions\n",
" \"Nb valeurs uniques\": len(uniques),\n",
" \"Exemples de valeurs\": uniques,\n",
" \"Nan Values\" : df_flows2[col].isna().sum()\n",
" })\n",
"\n",
"df_unique_flows = pd.DataFrame(rowsf)\n",
"df_unique_flows"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "520a5211-3735-44d8-9fe5-b2468c950606",
"metadata": {},
"outputs": [],
"source": [
"#rev pour df_flows2 et df_aum2 pas meme valeurs uniques "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e07a2b28-13f7-49f6-a55b-7d09a506407b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Colonne</th>\n",
" <th>Nbr Lignes_aum</th>\n",
" <th>Nb valeurs uniques_aum</th>\n",
" <th>Nbr Lignes_flows</th>\n",
" <th>Nb valeurs uniques_flows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Agreement - Code</td>\n",
" <td>4880297</td>\n",
" <td>2427</td>\n",
" <td>2574461</td>\n",
" <td>1611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Company - Id</td>\n",
" <td>4880297</td>\n",
" <td>1562</td>\n",
" <td>2574461</td>\n",
" <td>1231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Company - Ultimate Parent Id</td>\n",
" <td>4880297</td>\n",
" <td>1068</td>\n",
" <td>2574461</td>\n",
" <td>815</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Registrar Account - ID</td>\n",
" <td>4880297</td>\n",
" <td>12501</td>\n",
" <td>2574461</td>\n",
" <td>6842</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Registrar Account - Region</td>\n",
" <td>4880297</td>\n",
" <td>15</td>\n",
" <td>2574461</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>RegistrarAccount - Country</td>\n",
" <td>4880297</td>\n",
" <td>39</td>\n",
" <td>2574461</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Product - Asset Type</td>\n",
" <td>4880297</td>\n",
" <td>6</td>\n",
" <td>2574461</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Product - Strategy</td>\n",
" <td>4880297</td>\n",
" <td>53</td>\n",
" <td>2574461</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Product - Legal Status</td>\n",
" <td>4880297</td>\n",
" <td>6</td>\n",
" <td>2574461</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Product - Is Dedie ?</td>\n",
" <td>4880297</td>\n",
" <td>2</td>\n",
" <td>2574461</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Product - Fund</td>\n",
" <td>4880297</td>\n",
" <td>74</td>\n",
" <td>2574461</td>\n",
" <td>70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Product - Shareclass Type</td>\n",
" <td>4880297</td>\n",
" <td>12</td>\n",
" <td>2574461</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Product - Shareclass Currency</td>\n",
" <td>4880297</td>\n",
" <td>6</td>\n",
" <td>2574461</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Product - Isin</td>\n",
" <td>4880297</td>\n",
" <td>491</td>\n",
" <td>2574461</td>\n",
" <td>474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Centralisation Date</td>\n",
" <td>4880297</td>\n",
" <td>130</td>\n",
" <td>2574461</td>\n",
" <td>2780</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Colonne Nbr Lignes_aum Nb valeurs uniques_aum \\\n",
"0 Agreement - Code 4880297 2427 \n",
"1 Company - Id 4880297 1562 \n",
"2 Company - Ultimate Parent Id 4880297 1068 \n",
"3 Registrar Account - ID 4880297 12501 \n",
"4 Registrar Account - Region 4880297 15 \n",
"5 RegistrarAccount - Country 4880297 39 \n",
"6 Product - Asset Type 4880297 6 \n",
"7 Product - Strategy 4880297 53 \n",
"8 Product - Legal Status 4880297 6 \n",
"9 Product - Is Dedie ? 4880297 2 \n",
"10 Product - Fund 4880297 74 \n",
"11 Product - Shareclass Type 4880297 12 \n",
"12 Product - Shareclass Currency 4880297 6 \n",
"13 Product - Isin 4880297 491 \n",
"14 Centralisation Date 4880297 130 \n",
"\n",
" Nbr Lignes_flows Nb valeurs uniques_flows \n",
"0 2574461 1611 \n",
"1 2574461 1231 \n",
"2 2574461 815 \n",
"3 2574461 6842 \n",
"4 2574461 15 \n",
"5 2574461 34 \n",
"6 2574461 6 \n",
"7 2574461 50 \n",
"8 2574461 6 \n",
"9 2574461 2 \n",
"10 2574461 70 \n",
"11 2574461 11 \n",
"12 2574461 6 \n",
"13 2574461 474 \n",
"14 2574461 2780 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_aum = df_uniques[['Colonne', 'Nbr Lignes', 'Nb valeurs uniques']]\n",
"df2_flows = df_unique_flows[['Colonne', 'Nbr Lignes', 'Nb valeurs uniques']]\n",
"\n",
"df_merged = df1_aum.merge(df2_flows, on='Colonne', suffixes=('_aum', '_flows'))\n",
"df_merged"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a673272a-d109-470e-9da9-02f61ca2550b",
"metadata": {},
"outputs": [],
"source": [
"#descriptio colonnes de df_flows2\n",
"Colonne\tSignification pour df_flows2\n",
"\n",
"1- Agreement - Code\t: Code interne de laccord/fonds (pour identifier les fonds ou stratégies)\n",
"\n",
"2- Company - Id : \tID de lintermédiaire (banque, conseiller)\n",
"3- Company - Ultimate Parent Id\t: ID de la société mère de lintermédiaire\n",
"\n",
"4- Registrar Account - ID\t: Identifiant unique du compte (le client chez le dépositaire) #hyda code client !!!\n",
"5- Registrar Account - Region / Country: \tRégion ou pays du compte\n",
"\n",
"6- Product - Asset Type / Strategy / Legal Status / Is Dedie? : \n",
"Type de produit (Equity, Diversified…), stratégie dinvestissement, type légal du fonds (SICAV, FCP), dédié ou non\n",
"\n",
"8-Product - Fund / Shareclass Type / Currency / ISIN\t:\n",
"Nom du fonds, type de shareclass, devise, ISIN unique du produit\n",
"\n",
"9- Centralisation Date : \tDate de reporting du flux\n",
"10- Quantity - Subscription / Redemption / NetFlows : \t\n",
"Nombre de parts souscrites, rachetées et flux net (Subscription - Redemption)\n",
"\n",
"11- Value Ccy / Value € - Subscription / Redemption / NetFlows\t: \n",
"Valeurs en devise du fonds et en euro pour les souscriptions, rachats et flux net \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2152a444-4a84-426e-ac7a-4806a8d0a35e",
"metadata": {},
"outputs": [],
"source": [
"Pour faire de la segmentation client et du clustering, il faut que le dataset soit cohérent et \n",
"représentatif des clients finaux, pas juste des comptes !! JE DOIS CLEAN ET IDENTIFIER LES MISMATCH \n",
"\n",
"ANOMALIES :EXEMPLE CI DESSOUS, Y EN A D AUTRES? ??????\n",
"1- Un client change de compte / dépositaire :\n",
"\n",
"-- Compte A → flux sortants (retrait)\n",
"-- Compte B → flux entrants (nouveau compte)\n",
"Montants identiques et même ISIN / produit → même client peut etre\n",
"\n",
"2- Un compte disparaît et un autre se crée avec mêmes montants → flux continu"
]
},
{
"cell_type": "markdown",
"id": "4ce2ad22-08e6-4e63-96b2-c2301172516e",
"metadata": {},
"source": [
"# ETUDE ET ANALYSE DES ANOMALIES"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c883dfc2-b9b9-4d3e-80d3-0140cd222492",
"metadata": {},
"outputs": [],
"source": [
"df_aum2['Centralisation Date'] = pd.to_datetime(df_aum2['Centralisation Date'])\n",
"df_flows2['Centralisation Date'] = pd.to_datetime(df_flows2['Centralisation Date'])\n",
"\n",
"\n",
"key_cols = ['Registrar Account - ID', 'Product - Isin']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "64ca9883-372b-4a5e-8fc1-10e5d835c411",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Doublons AUM: 250232\n",
"Doublons Flows: 4849\n"
]
}
],
"source": [
"# Vérifier doublons exacts\n",
"doublons_aum = df_aum2[df_aum2.duplicated(subset=key_cols + ['Centralisation Date'], keep=False)]\n",
"doublons_flows = df_flows2[df_flows2.duplicated(subset=key_cols + ['Centralisation Date'], keep=False)]\n",
"\n",
"print(\"Doublons AUM:\", doublons_aum.shape[0])\n",
"print(\"Doublons Flows:\", doublons_flows.shape[0])\n",
"\n",
"#same date, code isin du produit, et account ==> revoir les autres caracteristiques "
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f47b276d-cce6-433c-87c5-860810d71d34",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Cols: ['Company - Id', 'Company - Ultimate Parent Id', 'Registrar Account - ID', 'Registrar Account - Region', 'Product - Isin']\n",
"Doublons AUM: 26452\n",
"Doublons Flows: 0\n"
]
}
],
"source": [
"cols= ['Company - Id', 'Company - Ultimate Parent Id',\n",
" 'Registrar Account - ID', 'Registrar Account - Region','Product - Isin']\n",
"\n",
"doublons_aum2 = df_aum2[df_aum2.duplicated(subset=cols + ['Centralisation Date'], keep=False)]\n",
"doublons_flows2 = df_flows2[df_flows2.duplicated(subset=cols + ['Centralisation Date'], keep=False)]\n",
"\n",
"print(\" Cols: \", cols)\n",
"print(\"Doublons AUM:\", doublons_aum2.shape[0])\n",
"print(\"Doublons Flows:\", doublons_flows2.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2f355e6-30c5-420e-a3db-9095dd5e0147",
"metadata": {},
"outputs": [],
"source": [
"# # Comptes avec same flux et same product ISIN mais IDs différents ---> candidats pseudo-client\n",
"# #plusieurs comptes diff réalisent EXACTEMENT le même flux sur le même produit le même jour.\n",
"# #date?\n",
"\n",
"# pseudo_client_candidates = df_flows2.groupby(['Product - Isin','Centralisation Date', 'Quantity - NetFlows', 'Value € - NetFlows'])['Registrar Account - ID'].nunique()\n",
"# pseudo_client_candidates = pseudo_client_candidates[pseudo_client_candidates > 1]\n",
"# print(\"Comptes potentiels à fusionner:\")\n",
"# print(pseudo_client_candidates)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff0284ad-2c28-4bb4-b6a5-92f52181e433",
"metadata": {},
"outputs": [],
"source": [
"# # Groupby pour détecter les flows identiques le même jour, sur le même ISIN\n",
"\n",
"# grouped = df_flows2.groupby(\n",
"# ['Product - Isin', 'Centralisation Date', 'Quantity - NetFlows', 'Value € - NetFlows']\n",
"# )['Registrar Account - ID'] \\\n",
"# .agg(['nunique', list]) \\\n",
"# .reset_index()\n",
"\n",
"# #plusieurs comptes différents apparaissent\n",
"# pseudo_client_candidates = grouped[grouped['nunique'] > 1]\n",
"\n",
"# print(\"Comptes potentiels à fusionner :\")\n",
"# display(pseudo_client_candidates)\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "3ff99523-0a79-465b-b257-ea15d6c42d2e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2574461, 24)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_flows2.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8df98c34-b1f7-4fb9-bbc7-c9bbe762022a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ISIN</th>\n",
" <th>Date</th>\n",
" <th>Compte sortant</th>\n",
" <th>Montant sortie</th>\n",
" <th>Compte entrant</th>\n",
" <th>Montant entrée</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-02</td>\n",
" <td>365247</td>\n",
" <td>-3126.05</td>\n",
" <td>200002169</td>\n",
" <td>3126.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-02</td>\n",
" <td>365888</td>\n",
" <td>-3751.26</td>\n",
" <td>365241</td>\n",
" <td>3751.26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-05</td>\n",
" <td>419848</td>\n",
" <td>-3135.85</td>\n",
" <td>200001747</td>\n",
" <td>3135.85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-05</td>\n",
" <td>417952</td>\n",
" <td>-627.17</td>\n",
" <td>200002169</td>\n",
" <td>627.17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-05</td>\n",
" <td>417937</td>\n",
" <td>-5644.53</td>\n",
" <td>406074</td>\n",
" <td>5644.53</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27875</th>\n",
" <td>LU2809794220</td>\n",
" <td>2025-10-16</td>\n",
" <td>200058108</td>\n",
" <td>-1315.00</td>\n",
" <td>200127345</td>\n",
" <td>1315.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27876</th>\n",
" <td>LU2809794220</td>\n",
" <td>2025-10-31</td>\n",
" <td>420350</td>\n",
" <td>-141.03</td>\n",
" <td>200127644</td>\n",
" <td>141.03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27877</th>\n",
" <td>LU2809794220</td>\n",
" <td>2025-11-03</td>\n",
" <td>200127356</td>\n",
" <td>-2996.91</td>\n",
" <td>420350</td>\n",
" <td>2996.91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27878</th>\n",
" <td>LU2809794493</td>\n",
" <td>2024-12-09</td>\n",
" <td>200058108</td>\n",
" <td>-97.27</td>\n",
" <td>420350</td>\n",
" <td>97.27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27879</th>\n",
" <td>LU2970252958</td>\n",
" <td>2025-04-09</td>\n",
" <td>200002082</td>\n",
" <td>-2613.34</td>\n",
" <td>200002271</td>\n",
" <td>2613.34</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>27880 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" ISIN Date Compte sortant Montant sortie Compte entrant \\\n",
"0 FR0010135103 2015-01-02 365247 -3126.05 200002169 \n",
"1 FR0010135103 2015-01-02 365888 -3751.26 365241 \n",
"2 FR0010135103 2015-01-05 419848 -3135.85 200001747 \n",
"3 FR0010135103 2015-01-05 417952 -627.17 200002169 \n",
"4 FR0010135103 2015-01-05 417937 -5644.53 406074 \n",
"... ... ... ... ... ... \n",
"27875 LU2809794220 2025-10-16 200058108 -1315.00 200127345 \n",
"27876 LU2809794220 2025-10-31 420350 -141.03 200127644 \n",
"27877 LU2809794220 2025-11-03 200127356 -2996.91 420350 \n",
"27878 LU2809794493 2024-12-09 200058108 -97.27 420350 \n",
"27879 LU2970252958 2025-04-09 200002082 -2613.34 200002271 \n",
"\n",
" Montant entrée \n",
"0 3126.05 \n",
"1 3751.26 \n",
"2 3135.85 \n",
"3 627.17 \n",
"4 5644.53 \n",
"... ... \n",
"27875 1315.00 \n",
"27876 141.03 \n",
"27877 2996.91 \n",
"27878 97.27 \n",
"27879 2613.34 \n",
"\n",
"[27880 rows x 6 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df_flows2.copy()\n",
"df[\"Date\"] = pd.to_datetime(df[\"Centralisation Date\"])\n",
"\n",
"# Groupby par ISIN et Date\n",
"grouped = df.groupby([\"Product - Isin\", \"Date\"])\n",
"\n",
"transfers = []\n",
"\n",
"for (isin, date), group in grouped:\n",
" # Sépare flux positifs et négatifs\n",
" entrants = group[group[\"Value € - NetFlows\"] > 0][[\"Registrar Account - ID\", \"Value € - NetFlows\"]]\n",
" sortants = group[group[\"Value € - NetFlows\"] < 0][[\"Registrar Account - ID\", \"Value € - NetFlows\"]]\n",
"\n",
" # On cherche des paires +M / -M\n",
" for _, row_sortie in sortants.iterrows():\n",
" montant_sortie = row_sortie[\"Value € - NetFlows\"]\n",
" compte_sortant = row_sortie[\"Registrar Account - ID\"]\n",
"\n",
" # Chercher un +M qui matche exactement le -M\n",
" match = entrants[entrants[\"Value € - NetFlows\"] == -montant_sortie]\n",
"\n",
" if len(match) > 0:\n",
" for _, row_entree in match.iterrows():\n",
" transfers.append({\n",
" \"ISIN\": isin,\n",
" \"Date\": date,\n",
" \"Compte sortant\": compte_sortant,\n",
" \"Montant sortie\": montant_sortie,\n",
" \"Compte entrant\": row_entree[\"Registrar Account - ID\"],\n",
" \"Montant entrée\": row_entree[\"Value € - NetFlows\"]\n",
" })\n",
"\n",
"\n",
"transf_compte = pd.DataFrame(transfers)\n",
"transf_compte\n",
"\n",
"#df initiale : 2 574 461 \n",
"# 27 880 rows "
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "df0c0bbb-4cff-4205-86e0-0393f83a4cc7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de comptes uniques impliqués dans les transferts : 1346\n"
]
}
],
"source": [
"# Extraire tous les comptes sortants et entrants\n",
"all_accounts = pd.concat([\n",
" transf_compte[\"Compte sortant\"],\n",
" transf_compte[\"Compte entrant\"]\n",
"])\n",
"\n",
"# Comptes uniques impliqués dans au moins un transfert\n",
"unique_accounts = all_accounts.unique()\n",
"\n",
"print(f\"Nombre de comptes uniques impliqués dans les transferts : {len(unique_accounts)}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "6dacddcb-74f1-441f-adbe-83275c8f9216",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABH8AAAG5CAYAAAAXopiMAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXecFEXax3+zkbyAEkQyoqACKp6KyplQFDMoZlHR1+wZ78RTEcOBGQPmrKeeigkVUJAgOeec47KkXTaHmX7/WGa2u6e6Ok/a39cPH3c6VD1dXV1d9fQTAoqiKCCEEEIIIYQQQgghKUlavAUghBBCCCGEEEIIIf5B5Q8hhBBCCCGEEEJICkPlDyGEEEIIIYQQQkgKQ+UPIYQQQgghhBBCSApD5Q8hhBBCCCGEEEJICkPlDyGEEEIIIYQQQkgKQ+UPIYQQQgghhBBCSApD5Q8hhBBCCCGEEEJICkPlDyGEEEJIivPZZ5/hzTffjLcYphQUFODpp5/GX3/9FW9RCCGEkJSCyh9CCCEkBtx0001o0KBBvMWIYtOmTQgEAvjkk0/iLUpcWLt2Lc477zzk5OQgEAjgxx9/jLdItmnfvj1uuukmw/0///wz7rjjDpxwwgmxE8ohgwcPxtixY3HiiSfGWxTPuOmmm9C+fft4i0EIIaSWQ+UPIYSQlOGTTz5BIBBAnTp1sH379qj9Z555Jo499tg4SFZ7KCkpwVNPPYXJkyfHWxRLDBo0CEuXLsVzzz2Hzz//3DOlw4oVK/DUU09h06ZNnpTnlE2bNmHw4MH473//i1NPPVWzL1b3aseOHXjqqaewaNEi6XGvvfYalixZgjFjxqBu3bq+ykQIIYTUNqj8IYQQknKUl5djxIgR8RajVlJSUoJhw4YlhfKntLQUM2fOxODBg3HPPffg+uuvR+vWrT0pe8WKFRg2bFjclT+LFi3Cu+++i8svvzxqX6zu1Y4dOzBs2DCp8qeiogLFxcUYN24cDj30UF/liTXvv/8+Vq9eHW8xCCGE1HKo/CGEEJJyHHfccXj//fexY8eOeIviCVVVVaioqIi3GL5QXFwct7p3794NAGjcuLFnZZaVlSEUCnlWnlsuu+wy9O/fP95imJKVlYXHHnsMHTt2jLcotlEUBaWlpYb7MzMzkZ2dHUOJqonns0UIISTxoPKHEEJIyvHYY48hGAxasv6pqqrCM888g06dOiE7Oxvt27fHY489hvLycs1x7du3x0UXXYTJkyfjxBNPRN26ddGtW7eI1cT333+Pbt26oU6dOujZsycWLlworG/Dhg3o27cv6tevj1atWuHpp5+GoiiR/eEYPC+99BJGjhwZkWvFihUAgFWrVuGKK65A06ZNUadOHZx44on4+eefLbVLfn4+brrpJuTk5KBx48YYNGgQ8vPzhcc6qWfTpk1o1qwZAGDYsGEIBAIIBAJ46qmnANTEPVq/fj369euHhg0b4rrrrgMA/PXXX7jyyivRtm1bZGdno02bNnjggQeiFtXhMrZv347LLrsMDRo0QLNmzfDwww8jGAxqjv3666/Rs2dPNGzYEI0aNUK3bt3w2muvAQCeeuoptGvXDgDwyCOPIBAIaOKybN++HbfccgtatGiB7OxsHHPMMfjoo4805U+ePBmBQABff/01Hn/8cRx++OGoV68eXn/9dVx55ZUAgLPOOivSDuG+Mm/ePPTt2xeHHnoo6tatiw4dOuCWW26Rti1QrWR49tln0bp1a9SrVw9nnXUWli9fLjw2Pz8f999/P9q0aYPs7GwcccQReP755yOKKbN7BVjvA/n5+XjggQfQvn17ZGdno3Xr1rjxxhuxZ88eTJ48GX/7298AADfffHOkHnWMqdmzZ+P8889HTk4O6tWrhzPOOAPTp0/X1FFYWIj7778/Ukfz5s1x7rnnYsGCBdI2e+qppxAIBLBq1SoMHDgQjRo1wiGHHIJ//OMfKCsr0xz78ccf4+yzz0bz5s2RnZ2No48+Gm+//XZUmeGxYPz48ZGx4N133zWUQRTzp7i4GA899FDk/hx11FF46aWXhGOBKB6X/l6Fr3PFihW49tpr0aRJE5x++unStiGEEFK7yIi3AIQQQojXdOjQATfeeCPef/99PProo2jVqpXhsbfeeis+/fRTXHHFFXjooYcwe/ZsDB8+HCtXrsQPP/ygOXbdunW49tprcfvtt+P666/HSy+9hIsvvhjvvPMOHnvsMdx1110AgOHDh2PgwIFYvXo10tJqvrMEg0Gcf/75OOWUU/DCCy9g3LhxGDp0KKqqqvD0009r6vr4449RVlaG//u//0N2djaaNm2K5cuX47TTTsPhhx+ORx99FPXr18c333yDyy67DKNHjxa69oRRFAWXXnoppk2bhjvuuANdu3bFDz/8gEGDBkUd67SeZs2a4e2338add96Jyy+/PGJx0r1798gxVVVV6Nu3L04//XS89NJLqFevHgDg22+/RUlJCe68804ccsghmDNnDt544w1s27YN3377raaeYDCIvn374uSTT8ZLL72ECRMm4OWXX0anTp1w5513AgD++OMPXHPNNTjnnHPw/PPPAwBWrlyJ6dOn4x//+Af69++Pxo0b44EHHsA111yDfv36RQJy79q1C6eccgoCgQDuueceNGvWDGPHjsXgwYNx4MAB3H///Rp5nnnmGWRlZeHhhx9GeXk5zjvvPNx33314/fXX8dhjj6Fr164AgK5duyIvLw/nnXcemjVrhkcffRSNGzfGpk2b8P333xveuzBPPvkknn32WfTr1w/9+vXDggULcN5550VZhZWUlOCMM87A9u3bcfvtt6Nt27aYMWMGhgwZgp07d2LkyJGm98pqHygqKkLv3r2xcuVK3HLLLTjhhBOwZ88e/Pzzz9i2bRu6du2Kp59+Gk8++ST+7//+D7179waASPyhP//8ExdccAF69uyJoUOHIi0tLaKE+euvv3DSSScBAO644w589913uOeee3D00Udj7969mDZtGlauXGkpkPXAgQPRvn17DB8+HLNmzcLrr7+O/fv347PPPosc8/bbb+OYY47BJZdcgoyMDIwZMwZ33XUXQqEQ7r77bk15q1evxjXXXIPbb78dt912G4466ihTGcIoioJLLrkEkyZNwuDBg3Hcccdh/PjxeOSRR7B9+3a8+uqrlsvSc+WVV6Jz5874z3/+o1EkEUIIIVAIIYSQFOHjjz9WAChz585V1q9fr2RkZCj33XdfZP8ZZ5yhHHPMMZHfixYtUgAot956q6achx9+WAGg/Pnnn5Ft7dq1UwAoM2bMiGwbP368AkCpW7eusnnz5sj2d999VwGgTJo0KbJt0KBBCgDl3nvvjWwLhULKhRdeqGRlZSm7d+9WFEVRNm7cqABQGjVqpOTl5WnkOuecc5Ru3bopZWVlmjJOPfVUpXPnztK2+fHHHxUAygsvvBDZVlVVpfTu3VsBoHz88cee1LN7924FgDJ06NCofeE2ePTRR6P2lZSURG0bPny4EggENG0bLuPpp5/WHHv88ccrPXv2jPz+xz/+oTRq1EipqqoylDXc1i+++KJm++DBg5XDDjtM2bNnj2b71VdfreTk5ERknTRpkgJA6dixY5T83377bVQfUBRF+eGHHyJ91A55eXlKVlaWcuGFFyqhUCiy/bHHHlMAKIMGDYpse+aZZ5T69esra9as0ZTx6KOPKunp6cqWLVsURZHfK6t94Mknn1QAKN9//31UGWE5586dG9XHwvs7d+6s9O3bV3NNJSUlSocOHZRzzz03si0nJ0e5++67JS0kZujQoQoA5ZJLLtFsv+uuuxQAyuLFizX16unbt6/SsWNHzbbwWDBu3DhLMgwaNEhp165d5Hf4WXz22Wc1x11xxRVKIBBQ1q1bpyhKTf/Ut5uiKFH3LXyd11xzjSWZCCGE1D7o9kUIISQl6dixI2644Qa899572Llzp/CY3377DQDw4IMParY/9NBDAIBff/1Vs/3oo49Gr169Ir9PPvlkAMDZZ5+Ntm3bRm3fsGFDVJ333HNP5O+wZUlFRQUmTJigOW7AgAERtxwA2LdvH/78808
"text/plain": [
"<Figure size 1400x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"transf_compte[\"Date\"] = pd.to_datetime(transf_compte[\"Date\"])\n",
"\n",
"# Nombre de transferts par jour\n",
"transfers_per_day = transf_compte.groupby(\"Date\").size()\n",
"\n",
"plt.figure(figsize=(14,5))\n",
"transfers_per_day.plot(kind=\"line\")\n",
"plt.title(\"Nombre de transferts détectés par jour\")\n",
"plt.xlabel(\"Date\")\n",
"plt.ylabel(\"Nombre de transferts\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "be0708c4-95df-4915-82f9-a6347187bd70",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Jours anormaux (avec beaucoup de transferts) :\n"
]
},
{
"data": {
"text/plain": [
"Date\n",
"2015-01-15 26\n",
"2015-01-16 29\n",
"2015-01-26 33\n",
"2015-01-27 26\n",
"2015-01-29 32\n",
" ..\n",
"2019-04-01 28\n",
"2021-01-04 26\n",
"2021-01-18 26\n",
"2021-02-08 27\n",
"2021-04-06 27\n",
"Length: 121, dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Détection de jours anormaux (ex: > 95e percentile)\n",
"threshold = transfers_per_day.quantile(0.95)\n",
"anomalous_days = transfers_per_day[transfers_per_day > threshold]\n",
"print(\"Jours anormaux (avec beaucoup de transferts) :\")\n",
"display(anomalous_days)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "f7da5d09-7c97-4fa2-921d-886928fdf80f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Jours anormaux (weekday + month) :\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>n_transfers</th>\n",
" <th>weekday</th>\n",
" <th>month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>2021-07-15</td>\n",
" <td>20</td>\n",
" <td>Thursday</td>\n",
" <td>July</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>2021-10-01</td>\n",
" <td>20</td>\n",
" <td>Friday</td>\n",
" <td>October</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>2021-06-15</td>\n",
" <td>20</td>\n",
" <td>Tuesday</td>\n",
" <td>June</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>2021-07-08</td>\n",
" <td>21</td>\n",
" <td>Thursday</td>\n",
" <td>July</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>2021-09-01</td>\n",
" <td>21</td>\n",
" <td>Wednesday</td>\n",
" <td>September</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>2021-10-13</td>\n",
" <td>21</td>\n",
" <td>Wednesday</td>\n",
" <td>October</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>2024-02-05</td>\n",
" <td>22</td>\n",
" <td>Monday</td>\n",
" <td>February</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>2021-08-11</td>\n",
" <td>22</td>\n",
" <td>Wednesday</td>\n",
" <td>August</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>2023-05-15</td>\n",
" <td>22</td>\n",
" <td>Monday</td>\n",
" <td>May</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>2025-10-13</td>\n",
" <td>22</td>\n",
" <td>Monday</td>\n",
" <td>October</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>2021-05-03</td>\n",
" <td>23</td>\n",
" <td>Monday</td>\n",
" <td>May</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2021-01-08</td>\n",
" <td>24</td>\n",
" <td>Friday</td>\n",
" <td>January</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2021-02-04</td>\n",
" <td>25</td>\n",
" <td>Thursday</td>\n",
" <td>February</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>2021-08-05</td>\n",
" <td>25</td>\n",
" <td>Thursday</td>\n",
" <td>August</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2021-06-03</td>\n",
" <td>25</td>\n",
" <td>Thursday</td>\n",
" <td>June</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2021-01-25</td>\n",
" <td>25</td>\n",
" <td>Monday</td>\n",
" <td>January</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2021-01-18</td>\n",
" <td>26</td>\n",
" <td>Monday</td>\n",
" <td>January</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2021-01-04</td>\n",
" <td>26</td>\n",
" <td>Monday</td>\n",
" <td>January</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2021-04-06</td>\n",
" <td>27</td>\n",
" <td>Tuesday</td>\n",
" <td>April</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2021-02-08</td>\n",
" <td>27</td>\n",
" <td>Monday</td>\n",
" <td>February</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date n_transfers weekday month\n",
"22 2021-07-15 20 Thursday July\n",
"29 2021-10-01 20 Friday October\n",
"19 2021-06-15 20 Tuesday June\n",
"21 2021-07-08 21 Thursday July\n",
"26 2021-09-01 21 Wednesday September\n",
"30 2021-10-13 21 Wednesday October\n",
"42 2024-02-05 22 Monday February\n",
"25 2021-08-11 22 Wednesday August\n",
"39 2023-05-15 22 Monday May\n",
"53 2025-10-13 22 Monday October\n",
"13 2021-05-03 23 Monday May\n",
"1 2021-01-08 24 Friday January\n",
"6 2021-02-04 25 Thursday February\n",
"24 2021-08-05 25 Thursday August\n",
"17 2021-06-03 25 Thursday June\n",
"4 2021-01-25 25 Monday January\n",
"3 2021-01-18 26 Monday January\n",
"0 2021-01-04 26 Monday January\n",
"11 2021-04-06 27 Tuesday April\n",
"7 2021-02-08 27 Monday February"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"transf_compte[\"Date\"] = pd.to_datetime(transf_compte[\"Date\"])\n",
"transf_comptee = transf_compte[transf_compte[\"Date\"].dt.year >= 2021]\n",
"# Nombre de transferts par jour\n",
"transfers_per_day = transf_comptee.groupby(\"Date\").size().rename(\"n_transfers\")\n",
"\n",
"# Détection des jours anormaux (au-dessus du 95e percentile)\n",
"threshold = transfers_per_day.quantile(0.95)\n",
"anomalous_days = transfers_per_day[transfers_per_day > threshold]\n",
"\n",
"# Ajouter weekday et month\n",
"anomalous_table = anomalous_days.reset_index()\n",
"anomalous_table[\"weekday\"] = anomalous_table[\"Date\"].dt.day_name()\n",
"anomalous_table[\"month\"] = anomalous_table[\"Date\"].dt.month_name()\n",
"\n",
"pd.set_option('display.max_rows', None)\n",
"print(\"Jours anormaux (weekday + month) :\")\n",
"display(anomalous_table.sort_values(\"n_transfers\").tail(20))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "1fa564f5-5844-4a1b-bdca-b392b829d734",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre total de comptes impliqués dans au moins un transfert : 1346\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABH8AAAHXCAYAAAAlXPamAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xl81HT+P/DXTK+hpQctlJZDKAXBUg4BEeRS5KigrAv+8MJFv56Irteqi3sArorHrqyuCqKou7KKFx6sWEBFEC0gtxwi1HIILYWWHlDaTmfy+6NkmswkmWQm0zn6evrgYSeTST5JPskk7/l8Pm+LIAgCiIiIiIiIiIgoIlmDXQAiIiIiIiIiIgocBn+IiIiIiIiIiCIYgz9ERERERERERBGMwR8iIiIiIiIiogjG4A8RERERERERUQRj8IeIiIiIiIiIKIIx+ENEREREREREFMEY/CEiIiIiIiIiimAM/hARERH5obKyEo8//ji+/fbbYBeFiIiISBGDP0REpOnmm29G69atg10MDwcPHoTFYsFbb70V7KK0WN988w0sFgu++eaboKy/a9euuPnmm4NenltvvRVffPEFBg0aFND1zJkzBxaLJaDrMNtbb70Fi8WCgwcPBrsoIeu5555Dt27dEBUVhf79+we7OCEtHM8BIqJQweAPEVEIEB+QbDYbjh496vH+pZdeitzc3CCUjEJdTU0N5syZE7QATEv3wgsvYOfOnVi+fDlatWoV7OIQgBUrVmDOnDnBLoYuq1atwiOPPIJhw4bhzTffxFNPPWXasl955RUGx4mIyIXBHyKiEFJXV4enn3462MWgMFJTU4O5c+cGJfgzcuRInD17FiNHjmz2dStp7vLU19fjzJkzyM/PR9u2bZtlneTdihUrMHfu3GAXQ5evv/4aVqsVixcvxu9+9ztMmDDBtGVHYvDnz3/+M86ePRvsYhARhSUGf4iIQkj//v3x2muv4dixY8EuiikaGhpQX18f7GJQgFitVthsNlitoXE70dzliY2NxWOPPYZu3bo1y/rIfMG+RpWWlqJVq1aIjY01bZk1NTWmLSsYtMofHR0Nm83WjKVpdObMmWZfJxGR2ULjbo2IiAAAjz32GBwOh67WPw0NDfjb3/6G7OxsxMXFoWvXrnjsscdQV1cnm69r16648sor8c0332DQoEFo1aoV+vTp42opsmzZMvTp0wc2mw0DBw7Etm3bFNf3yy+/YPz48UhISECHDh3w+OOPQxAE1/viGDx///vf8c9//tNVrj179gAAfvrpJ1xzzTVITU2FzWbDoEGD8Nlnn+naLxUVFbj55puRnJyMlJQUTJ8+HRUVFYrz+rMep9OJF154wbU/2rVrh7y8PGzevNk1T3Ptd3GsJa39fvDgQbRr1w4AMHfuXFgsFlgsFlmXFz37w263Y+7cuejRowdsNhvS0tIwfPhwrF69WnN/KY2xI3ZR3LlzJ0aNGoX4+Hh0794dH374IQBg7dq1uPjii9GqVSv07NkTX375pWyZ4pgeP/30E6ZOnYqkpCSkpaXhvvvuQ21treHyAMCiRYuQnZ2NVq1aYfDgwfj2229x6aWX4tJLL3XNozY2jdoyN27ciLy8PCQnJyM+Ph6jRo3Cd999J5unuroa999/P7p27Yq4uDikp6dj7Nix2Lp1q+Z2AMD69etx0UUXwWazITs7G6+++qrifG+++SZGjx6N9PR0xMXFIScnBwsWLPCYT6yP69evx+DBg2Gz2dCtWzf85z//kc3na10AgN27d2P06NFo1aoVOnXqhCeeeAJOp1Nx3i+++AIjRoxAQkICEhMTMXHiROzevVs2T0lJCW655RZ06tQJcXFxyMzMxG9+8xvN8YNuvvlmvPzyywDgOh/EMWK0rlH19fX461//ioEDByI5ORkJCQkYMWIE1qxZI1u+dBlivYqLi8NFF12EH374wVD5LRYL3nzzTZw5c8ZVTmlLnSVLlmDgwIFo1aoVUlNTcd111+HIkSOydYjn25YtWzBy5EjEx8fjscceQ9euXbF7926sXbvWtWyxvvt6jMVzZN26dbjzzjuRlpaGpKQk/O53v8OpU6dk83766aeYOHEiOnTogLi4OGRnZ+Nvf/sbHA6HrvKrURrzR+812f3aKHIfO0zczrVr1+Luu+9Geno6OnXqpLlviIjCQXSwC0BERE2ysrLwu9/9Dq+99hr++Mc/okOHDqrz3nbbbfj3v/+Na665Bg899BA2btyIefPmYe/evfj4449l8x44cAA33HAD7rzzTkybNg1///vfcdVVV2HhwoV47LHHcPfddwMA5s2bh6lTp2Lfvn2y1hMOhwN5eXkYMmQInn32WeTn52P27NloaGjA448/LlvXm2++idraWtxxxx2Ii4tDamoqdu/ejWHDhqFjx4744x//iISEBLz//vu4+uqr8dFHH+G3v/2t6nYKgoDf/OY3WL9+Pe666y5ccMEF+PjjjzF9+nSPef1ZD9A4cO9bb72FK664ArfddhsaGhrw7bffYsOGDa7BfENpv7dr1w4LFizAjBkz8Nvf/haTJ08GAPTt29fQ/pgzZw7mzZuH2267DYMHD0ZVVRU2b96MrVu3YuzYsZr7TMmpU6dw5ZVX4rrrrsP/+3//DwsWLMB1112H//73v7j//vtx11134YYbbsBzzz2Ha665BkeOHEFiYqJsGVOnTkXXrl0xb948bNiwAS+++CJOnTrlEazwZvHixbjzzjtxySWX4P7778cvv/yCSZMmITU1FZ07dza8bUBjV50rrrgCAwcOxOzZs2G1Wl1BmG+//RaDBw8GANx111348MMPcc899yAnJwdlZWVYv3499u7diwEDBqgu/8cff8S4cePQrl07zJkzBw0NDZg9ezbat2/vMe+CBQvQu3dvTJo0CdHR0Vi+fDnuvvtuOJ1OzJw5UzbvgQMHcM011+DWW2/F9OnT8cYbb+Dmm2/GwIED0bt3bwC+14WSkhJcdtllaGhocNW1RYsWKY6D9Pbbb2P69OkYP348nnnmGdTU1GDBggUYPnw4tm3bhq5duwIApkyZgt27d+Pee+9F165dUVpaitWrV+Pw4cOuedzdeeedOHbsGFavXo23335bcR6la1RVVRVef/11XH/99bj99ttRXV2NxYsXY/z48di0aZPHQMzvvPMOqqurceedd8JiseDZZ5/F5MmT8csvvyAmJkZX+d9++20sWrQImzZtwuuvvw4AuOSSSwAATz75JP7yl79g6tSpuO2223DixAn861//wsiRI7Ft2zakpKS4ylJWVoYrrrgC1113HaZNm4b27dvj0ksvxb333ovWrVvjT3/6EwC46o+/5/s999yDlJQUzJkzB/v27cOCBQtw6NAhV6AUaAygtG7dGg8++CBat26Nr7/+Gn/9619RVVWF5557TrY8pfIbYeSabMTdd9+Ndu3a4a9//Stb/hBRZBCIiCjo3nzzTQGA8MMPPwiFhYVCdHS08Pvf/971/qhRo4TevXu7Xm/fvl0AINx2222y5fzhD38QAAhff/21a1qXLl0EAML333/vmrZy5UoBgNCqVSvh0KFDrumvvvqqAEBYs2aNa9r06dMFAMK9997rmuZ0OoWJEycKsbGxwokTJwRBEISioiIBgJCUlCSUlpbKynX55ZcLffr0EWpra2XLuOSSS4QePXpo7ptPPvlEACA8++yzrmkNDQ3CiBEjBADCm2++acp6vv76awGAbL9LlyEIobnfT5w4IQAQZs+e7VFuvfujX79+wsSJEzX3j5I1a9Z4lHvUqFECAOGdd95xTfvpp58EAILVahU2bNjgsT+kx3D27NkCAGHSpEmydd19990CAGHHjh2uaV26dBGmT5+uWp76+nohPT1d6N+/v1BXV+eab9GiRQIAYdSoUa5p4jlYVFSkuY1Op1Po0aOHMH78eFe9EARBqKmpEbKysoSxY8e6piUnJwszZ85U3X9qrr76asFms8nqyJ49e4SoqCjB/datpqbG4/Pjx48XunXrJpsm1sd169a5ppWWlgpxcXHCQw895Jrma124//77BQDCxo0bZctPTk6
"text/plain": [
"<Figure size 1400x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Aperçu :\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>comptes_uniques</th>\n",
" <th>n_comptes</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-01-02</th>\n",
" <td>{365261, 200002169, 406197, 365888, 365247, 41...</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-05</th>\n",
" <td>{312478, 364999, 364765, 419848, 417937, 41928...</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-06</th>\n",
" <td>{422250, 364999, 417937, 417622, 411426, 41933...</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-07</th>\n",
" <td>{365364, 365377, 364999, 200001895, 200000201,...</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-08</th>\n",
" <td>{405798, 364999, 417937, 348493, 365852, 24969...</td>\n",
" <td>21</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" comptes_uniques n_comptes\n",
"Date \n",
"2015-01-02 {365261, 200002169, 406197, 365888, 365247, 41... 16\n",
"2015-01-05 {312478, 364999, 364765, 419848, 417937, 41928... 31\n",
"2015-01-06 {422250, 364999, 417937, 417622, 411426, 41933... 25\n",
"2015-01-07 {365364, 365377, 364999, 200001895, 200000201,... 27\n",
"2015-01-08 {405798, 364999, 417937, 348493, 365852, 24969... 21"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#- 2) Nombre total de comptes impliqués ---------\n",
"\n",
"all_accounts = pd.concat([\n",
" transf_compte[\"Compte sortant\"],\n",
" transf_compte[\"Compte entrant\"]\n",
"]).unique()\n",
"\n",
"print(f\"Nombre total de comptes impliqués dans au moins un transfert : {len(all_accounts)}\")\n",
"\n",
"# --------- Nombre de comptes impliqués par jour ----------\n",
"\n",
"accounts_per_day = transf_compte.groupby(\"Date\").agg(\n",
" comptes_uniques=(\"Compte sortant\", lambda x: set(x)) # temp\n",
")\n",
"\n",
"# On ajoute aussi les comptes entrants\n",
"accounts_per_day[\"comptes_uniques\"] = accounts_per_day.index.map(\n",
" lambda d: set(transf_compte[transf_compte[\"Date\"] == d][\"Compte sortant\"]) |\n",
" set(transf_compte[transf_compte[\"Date\"] == d][\"Compte entrant\"])\n",
")\n",
"\n",
"accounts_per_day[\"n_comptes\"] = accounts_per_day[\"comptes_uniques\"].apply(len)\n",
"\n",
"# Plot\n",
"plt.figure(figsize=(14,5))\n",
"plt.plot(accounts_per_day.index, accounts_per_day[\"n_comptes\"], marker=\"o\")\n",
"plt.title(\"Nombre de comptes impliqués dans des transferts par jour\")\n",
"plt.xlabel(\"Date\")\n",
"plt.ylabel(\"Nombre de comptes uniques\")\n",
"plt.grid(True)\n",
"plt.show()\n",
"\n",
"print(\"Aperçu :\")\n",
"accounts_per_day.head()\n"
]
},
{
"cell_type": "markdown",
"id": "c898b0c5-0a8e-4640-bc52-9490ee80e53d",
"metadata": {},
"source": [
"# MERGE AND ANALYSIS"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce33dbf8-1c59-416a-adc4-6eb7c1ea9d8e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"df_aum2 = df_aum2.rename(columns={\n",
" \"Registrar Account - ID\": \"Account_ID\",\n",
" \"Product - Isin\": \"ISIN\",\n",
" \"Centralisation Date\": \"Date\",\n",
" \"Value - AUM €\": \"AUM_EUR\"\n",
"})\n",
"\n",
"df_flows2 = df_flows2.rename(columns={\n",
" \"Registrar Account - ID\": \"Account_ID\",\n",
" \"Product - Isin\": \"ISIN\",\n",
" \"Centralisation Date\": \"Date\",\n",
" \"Value € - NetFlows\": \"Flow_EUR\"\n",
"})\n",
"\n",
"\n",
"df_aum2[\"Date\"] = pd.to_datetime(df_aum2[\"Date\"])\n",
"df_flows2[\"Date\"] = pd.to_datetime(df_flows2[\"Date\"])\n",
"\n",
"df_aum2[\"Account_ID\"] = df_aum2[\"Account_ID\"].astype(str)\n",
"df_flows2[\"Account_ID\"] = df_flows2[\"Account_ID\"].astype(str)\n",
"\n",
"df_aum2[\"ISIN\"] = df_aum2[\"ISIN\"].str.upper()\n",
"df_flows2[\"ISIN\"] = df_flows2[\"ISIN\"].str.upper()\n",
"\n",
"\n",
"df_merged = pd.merge(\n",
" df_aum2[[\"Account_ID\", \"ISIN\", \"Date\", \"AUM_EUR\"]],\n",
" df_flows2[[\"Account_ID\", \"ISIN\", \"Date\", \"Flow_EUR\"]],\n",
" on=[\"Account_ID\", \"ISIN\", \"Date\"],\n",
" how=\"outer\"\n",
").sort_values([\"Account_ID\", \"ISIN\", \"Date\"])\n",
"\n",
"print(\"Merged dataset:\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e5d642e-5c16-4c78-8d83-075094902670",
"metadata": {},
"outputs": [],
"source": [
"df_merged"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea14866a-1ce6-4b19-9225-d725304af8ec",
"metadata": {},
"outputs": [],
"source": [
"# 2. HISTOGRAMME DES AUM (SANS ISIN)\n",
"\n",
"# We keep the mean AUM per Account \n",
"aum_by_account = (\n",
" df_merged.groupby(\"Account_ID\")[\"AUM_EUR\"]\n",
" .mean()\n",
" .dropna()\n",
")\n",
"\n",
"plt.figure(figsize=(10,6))\n",
"plt.hist(aum_by_account, bins=50)\n",
"plt.xlabel(\"Mean AUM value (€)\")\n",
"plt.ylabel(\"Number of client accounts\")\n",
"plt.title(\"Distribution of client average AUM (one value per Account_ID)\")\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f13c213e-7f72-494a-bcf0-b4cd9feee55d",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# 3. ANALYSE DES FLOWS POUR UN COMPTE DANS UN FONDS\n",
"\n",
"account_to_plot = \"YOUR_ACCOUNT_ID_HERE\"\n",
"isin_to_plot = \"YOUR_ISIN_HERE\"\n",
"\n",
"client_flows = df_merged[\n",
" (df_merged[\"Account_ID\"] == account_to_plot) &\n",
" (df_merged[\"ISIN\"] == isin_to_plot)\n",
"].sort_values(\"Date\")\n",
"\n",
"plt.figure(figsize=(12,5))\n",
"plt.plot(client_flows[\"Date\"], client_flows[\"Flow_EUR\"], marker=\"o\")\n",
"plt.axhline(0, color=\"black\", linewidth=1)\n",
"plt.xlabel(\"Date\")\n",
"plt.ylabel(\"Flow (€)\")\n",
"plt.title(f\"Flow movements for Account {account_to_plot}, ISIN {isin_to_plot}\")\n",
"plt.grid(True)\n",
"plt.show()\n",
"\n",
"###############################################################################\n",
"# 4. ANALYSE MENSUELLE DES FLOWS (ENTRANTS / SORTANTS)\n",
"###############################################################################\n",
"\n",
"df_merged[\"YearMonth\"] = df_merged[\"Date\"].dt.to_period(\"M\")\n",
"\n",
"flows_monthly = df_merged.groupby(\"YearMonth\").agg(\n",
" n_positive_flows=(\"Flow_EUR\", lambda x: (x > 0).sum()),\n",
" n_negative_flows=(\"Flow_EUR\", lambda x: (x < 0).sum()),\n",
")\n",
"\n",
"print(\"Monthly flow summary:\")\n",
"print(flows_monthly.head())\n",
"\n",
"# ---- Plot bar chart ----\n",
"flows_monthly.index = flows_monthly.index.astype(str)\n",
"\n",
"plt.figure(figsize=(14,6))\n",
"plt.bar(flows_monthly.index, flows_monthly[\"n_positive_flows\"], label=\"Positive flows (inflows)\", alpha=0.7)\n",
"plt.bar(flows_monthly.index, flows_monthly[\"n_negative_flows\"], label=\"Negative flows (outflows)\", alpha=0.7)\n",
"plt.xticks(rotation=90)\n",
"plt.xlabel(\"Year-Month\")\n",
"plt.ylabel(\"Number of accounts with flows\")\n",
"plt.title(\"Monthly number of accounts with inflows vs outflows\")\n",
"plt.legend()\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}