diff --git a/data_exploration/explore.ipynb b/data_exploration/explore.ipynb
index 84590dd..6f80e6c 100644
--- a/data_exploration/explore.ipynb
+++ b/data_exploration/explore.ipynb
@@ -61,21 +61,38 @@
]
},
{
- "cell_type": "code",
- "execution_count": 9,
- "id": "83472648",
+ "cell_type": "markdown",
+ "id": "7f7d45bb",
"metadata": {},
- "outputs": [],
"source": [
- "with fs.open('s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv', 'rb') as f:\n",
- " df = pd.read_csv(f, sep =\";\")\n",
- "\n",
- "sample_df = df"
+ "### OG AUM"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
+ "id": "83472648",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_149812/3855858483.py:2: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(f, sep =\";\")\n"
+ ]
+ }
+ ],
+ "source": [
+ "with fs.open('s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n",
+ " df = pd.read_csv(f, sep =\";\")\n",
+ "\n",
+ "sample_df = sample_by_blocks(df, block_size=10, num_blocks=10, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
"id": "79af063e",
"metadata": {},
"outputs": [
@@ -100,89 +117,359 @@
" \n",
" \n",
" \n",
" \n",
" \n",
- " Date \n",
- " Yld to Maturity \n",
+ " Agreement - Code \n",
+ " Company - Id \n",
+ " Company - Ultimate Parent Id \n",
+ " Registrar Account - ID \n",
+ " Registrar Account - Region \n",
+ " RegistrarAccount - Country \n",
+ " Product - Asset Type \n",
+ " Product - Strategy \n",
+ " Product - Legal Status \n",
+ " Product - Is Dedie ? \n",
+ " Product - Fund \n",
+ " Product - Shareclass Type \n",
+ " Product - Shareclass Currency \n",
+ " Product - Isin \n",
+ " Centralisation Date \n",
+ " Quantity - AUM \n",
+ " Value - AUM CCY \n",
+ " Value - AUM € \n",
"
2826 rows × 2 columns
\n", + "100 rows × 18 columns
\n", "" ], "text/plain": [ - " Date Yld to Maturity\n", - "0 31/12/2014 0.144\n", - "1 02/01/2015 -0.079\n", - "2 05/01/2015 -0.074\n", - "3 06/01/2015 -0.075\n", - "4 07/01/2015 -0.069\n", - "... ... ...\n", - "2821 16/10/2025 1.928\n", - "2822 17/10/2025 1.928\n", - "2823 20/10/2025 1.928\n", - "2824 21/10/2025 1.927\n", - "2825 22/10/2025 1.928\n", + " Agreement - Code Company - Id Company - Ultimate Parent Id \\\n", + "0 088 82.0 82.0 \n", + "1 088 82.0 82.0 \n", + "2 088 82.0 82.0 \n", + "3 088 82.0 82.0 \n", + "4 088 82.0 82.0 \n", + ".. ... ... ... \n", + "95 L652 101.0 33676.0 \n", + "96 L652 101.0 33676.0 \n", + "97 L652 101.0 33676.0 \n", + "98 L503_16 9390.0 19105.0 \n", + "99 L503_16 9390.0 19105.0 \n", "\n", - "[2826 rows x 2 columns]" + " Registrar Account - ID Registrar Account - Region \\\n", + "0 406321 France \n", + "1 406321 France \n", + "2 406321 France \n", + "3 406321 France \n", + "4 403909 France \n", + ".. ... ... \n", + "95 364535 Luxembourg \n", + "96 364535 Luxembourg \n", + "97 364535 Luxembourg \n", + "98 200037779 Belgium \n", + "99 200037779 Belgium \n", + "\n", + " RegistrarAccount - Country Product - Asset Type \\\n", + "0 France Diversified \n", + "1 France Diversified \n", + "2 France Diversified \n", + "3 France Equity \n", + "4 France Equity \n", + ".. ... ... \n", + "95 Luxembourg Equity \n", + "96 Luxembourg Equity \n", + "97 Luxembourg Equity \n", + "98 Belgium Diversified \n", + "99 Belgium Diversified \n", + "\n", + " Product - Strategy Product - Legal Status \\\n", + "0 Multi Expertise FCP \n", + "1 Multi Expertise FCP \n", + "2 Multi Expertise FCP \n", + "3 Large Cap Emerging Markets Strategy FCP \n", + "4 Investissement FCP \n", + ".. ... ... \n", + "95 Investissement FCP \n", + "96 Investissement FCP \n", + "97 Investissement FCP \n", + "98 Emerging Patrimoine SICAV \n", + "99 Emerging Patrimoine SICAV \n", + "\n", + " Product - Is Dedie ? Product - Fund \\\n", + "0 NO Carmignac Profil Réactif 100 \n", + "1 NO Carmignac Profil Réactif 100 \n", + "2 NO Carmignac Profil Réactif 100 \n", + "3 NO Carmignac Emergents \n", + "4 NO Carmignac Investissement \n", + ".. ... ... \n", + "95 NO Carmignac Investissement \n", + "96 NO Carmignac Investissement \n", + "97 NO Carmignac Investissement \n", + "98 NO Carmignac Portfolio Emerging Patrimoine \n", + "99 NO Carmignac Portfolio Emerging Patrimoine \n", + "\n", + " Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n", + "0 A EUR FR0010149211 \n", + "1 A EUR FR0010149211 \n", + "2 A EUR FR0010149211 \n", + "3 A EUR FR0010149302 \n", + "4 A EUR FR0010148981 \n", + ".. ... ... ... \n", + "95 A EUR FR0010148981 \n", + "96 A EUR FR0010148981 \n", + "97 A EUR FR0010148981 \n", + "98 A USD LU0592699259 \n", + "99 A USD LU0592699259 \n", + "\n", + " Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n", + "0 2020-02-29 0.0 0.00 0.0000 \n", + "1 2021-01-31 0.0 0.00 0.0000 \n", + "2 2021-05-31 0.0 0.00 0.0000 \n", + "3 2015-03-31 0.0 0.00 0.0000 \n", + "4 2020-09-30 0.0 0.00 0.0000 \n", + ".. ... ... ... ... \n", + "95 2017-03-31 0.0 0.00 0.0000 \n", + "96 2017-11-30 0.0 0.00 0.0000 \n", + "97 2018-02-28 0.0 0.00 0.0000 \n", + "98 2017-04-30 65.0 6820.45 6263.3270 \n", + "99 2017-09-30 65.0 6857.50 5800.6259 \n", + "\n", + "[100 rows x 18 columns]" ] }, - "execution_count": 10, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -193,12 +480,732 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, + "id": "65e2c6d4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Diversified', 'Equity', nan, 'Alternative', 'Fixed Income',\n", + " 'Private Assets'], dtype=object)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Product - Asset Type\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "7f294d86", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Patrimoine', 'Investissement', 'Euro-Investissement',\n", + " 'Innovation', 'Absolute Return Europe', 'Climate Transition',\n", + " 'Euro-Entrepreneurs', 'Large Cap Emerging Markets Strategy',\n", + " 'Emerging Patrimoine', 'Grande Europe', 'Investissement Latitude',\n", + " 'Court Terme', 'Sécurité', 'Multi Expertise', 'Infotech',\n", + " 'Global Bond', 'Patrimoine Europe', 'Credit 2027',\n", + " 'Asia Discovery', 'Tech Solutions', 'Flexible Bond', 'Credit',\n", + " 'Market Neutral', 'Emerging Markets Debt',\n", + " 'Long-Short European Equities', 'Long-Short Global Equities',\n", + " 'Grandchildren', 'Inflation Solution', 'Credit 2029',\n", + " 'Credit 2031', 'Capital Cube', 'China New Economy', 'Crédit 2025',\n", + " 'Family Governed', 'Evergreen', 'Human Xperience', 'China',\n", + " 'Credit Opportunities', 'Merger Arbitrage Plus',\n", + " 'Flexible Allocation 2024', 'European Leaders', 'Merger Arbitrage',\n", + " nan, 'Mapfre Carmignac F.P.', 'Active Risk Allocation',\n", + " 'Credit Suisse Carmignac EM Multi-Asset',\n", + " 'Fonditalia Carmignac Active Allocation', 'Global Active',\n", + " 'LUX IM - Carmignac Emerging Flexible Bond', 'Evolution',\n", + " 'UFF Grande Europe 0-100', 'Cross Asset Opportunities',\n", + " 'Alpha Themes'], dtype=object)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Product - Strategy\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2fca184a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Carmignac Patrimoine', 'Carmignac Investissement',\n", + " 'Carmignac Portfolio Investissement',\n", + " 'Carmignac Euro-Investissement', 'Carmignac Innovation',\n", + " 'Carmignac Absolute Return Europe',\n", + " 'Carmignac Portfolio Climate Transition',\n", + " 'Carmignac Euro-Entrepreneurs', 'Carmignac Emergents',\n", + " 'Carmignac Portfolio Emerging Patrimoine',\n", + " 'Carmignac Portfolio Grande Europe',\n", + " 'Carmignac Investissement Latitude', 'Carmignac Court Terme',\n", + " 'Carmignac Sécurité', 'Carmignac Multi Expertise',\n", + " 'Carmignac Portfolio Infotech', 'Carmignac Profil Réactif 100',\n", + " 'Carmignac Portfolio Global Bond', 'Carmignac Profil Réactif 75',\n", + " 'Carmignac Portfolio Patrimoine Europe', 'Carmignac Credit 2027',\n", + " 'Carmignac Portfolio Asia Discovery',\n", + " 'Carmignac Portfolio Tech Solutions',\n", + " 'Carmignac Portfolio Flexible Bond', 'Carmignac Portfolio Credit',\n", + " 'Carmignac Portfolio Market Neutral',\n", + " 'Carmignac Portfolio EM Debt',\n", + " 'Carmignac Portfolio Long-Short European Equities',\n", + " 'Carmignac Portfolio Long-Short Global Equities',\n", + " 'Carmignac Portfolio Patrimoine',\n", + " 'Carmignac Portfolio Euro-Entrepreneurs',\n", + " 'Carmignac Portfolio Grandchildren',\n", + " 'Carmignac Portfolio Investissement Latitude',\n", + " 'Carmignac Portfolio Emergents', 'Carmignac Portfolio Sécurité',\n", + " 'Carmignac Portfolio Inflation Solution', 'Carmignac Credit 2029',\n", + " 'Carmignac Credit 2031', 'Carmignac Portfolio Capital Cube',\n", + " 'Carmignac Portfolio China New Economy', 'Carmignac Credit 2025',\n", + " 'Carmignac Epargne Actions Monde ISR',\n", + " 'Carmignac Portfolio Family Governed',\n", + " 'Carmignac S.A. SICAV - PART II UCI Private Evergreen',\n", + " 'Carmignac Portfolio Human Xperience',\n", + " 'Carmignac China New Economy', 'Carmignac Portfolio China',\n", + " 'Carmignac Alts ICAV Carmignac Credit Opportunities',\n", + " 'Carmignac Portfolio Merger Arbitrage Plus',\n", + " 'Carmignac Portfolio Absolute Return Europe',\n", + " 'Carmignac Portfolio Flexible Allocation 2024',\n", + " 'FP Carmignac European Leaders',\n", + " 'FP Carmignac Global Equity Compounders',\n", + " 'Carmignac Portfolio Merger Arbitrage',\n", + " 'Solys - Carmignac Equity Selection',\n", + " 'FP Carmignac Emerging Markets', 'Mapfre Carmignac F.P.',\n", + " 'FP Carmignac Global Bond',\n", + " 'Carmignac Alts ICAV European Long Short',\n", + " 'Carmignac Portfolio Active Risk Allocation',\n", + " 'FP Carmignac Emerging Patrimoine', 'FP Carmignac Patrimoine',\n", + " 'FP Carmignac Emerging Discovery',\n", + " 'Credit Suisse Carmignac Emerging Markets Multi-Asset Fund',\n", + " 'Fonditalia Carmignac Active Allocation',\n", + " 'Carmignac Global Active',\n", + " 'LUX IM - Carmignac Emerging Flexible Bond',\n", + " 'Carmignac Portfolio Evolution', 'UFF Grande Europe 0-100',\n", + " 'CFP 1', 'Carmignac Portfolio Sustainable Bond',\n", + " 'Carmignac Portfolio Cross Asset Opportunities',\n", + " 'Carmignac Portfolio Alpha Themes',\n", + " 'Carmignac Portfolio Global Market Neutral'], dtype=object)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Product - Fund\"].unique()" + ] + }, + { + "cell_type": "markdown", + "id": "6f40c922", + "metadata": {}, + "source": [ + "### Repaired AUM" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b6edd4fd", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_149812/221531830.py:2: DtypeWarning: Columns (2,3,4,5) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df_ = pd.read_csv(f, sep =\",\")\n" + ] + } + ], + "source": [ + "with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f:\n", + " df_ = pd.read_csv(f, sep =\",\")\n", + "\n", + "sample_df_ = sample_by_blocks(df_, block_size=10, num_blocks=10, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, "id": "36ec4312", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "| \n", + " | Unnamed: 0.1 | \n", + "Unnamed: 0 | \n", + "Agreement - Code | \n", + "Company - Id | \n", + "Company - Ultimate Parent Id | \n", + "Registrar Account - ID | \n", + "Registrar Account - Region | \n", + "RegistrarAccount - Country | \n", + "Product - Asset Type | \n", + "Product - Strategy | \n", + "... | \n", + "Product - Is Dedie ? | \n", + "Product - Fund | \n", + "Product - Shareclass Type | \n", + "Product - Shareclass Currency | \n", + "Product - Isin | \n", + "Centralisation Date | \n", + "Quantity - AUM | \n", + "Value - AUM CCY | \n", + "Value - AUM € | \n", + "repair_flag | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "474086 | \n", + "474032 | \n", + "089 | \n", + "151.0 | \n", + "877.0 | \n", + "406474 | \n", + "FRANCE | \n", + "FRANCE | \n", + "DIVERSIFIED | \n", + "MULTI EXPERTISE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC MULTI EXPERTISE | \n", + "A | \n", + "EUR | \n", + "FR0010149203 | \n", + "2015-04-30 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 1 | \n", + "474087 | \n", + "474033 | \n", + "089 | \n", + "151.0 | \n", + "877.0 | \n", + "406474 | \n", + "FRANCE | \n", + "FRANCE | \n", + "DIVERSIFIED | \n", + "MULTI EXPERTISE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC MULTI EXPERTISE | \n", + "A | \n", + "EUR | \n", + "FR0010149203 | \n", + "2015-10-31 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 2 | \n", + "474088 | \n", + "474034 | \n", + "089 | \n", + "151.0 | \n", + "877.0 | \n", + "406474 | \n", + "FRANCE | \n", + "FRANCE | \n", + "DIVERSIFIED | \n", + "MULTI EXPERTISE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC MULTI EXPERTISE | \n", + "A | \n", + "EUR | \n", + "FR0010149203 | \n", + "2016-03-31 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 3 | \n", + "474089 | \n", + "474035 | \n", + "089 | \n", + "151.0 | \n", + "877.0 | \n", + "406474 | \n", + "FRANCE | \n", + "FRANCE | \n", + "DIVERSIFIED | \n", + "MULTI EXPERTISE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC MULTI EXPERTISE | \n", + "A | \n", + "EUR | \n", + "FR0010149203 | \n", + "2016-11-30 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 4 | \n", + "474090 | \n", + "474036 | \n", + "089 | \n", + "151.0 | \n", + "877.0 | \n", + "406474 | \n", + "FRANCE | \n", + "FRANCE | \n", + "DIVERSIFIED | \n", + "MULTI EXPERTISE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC MULTI EXPERTISE | \n", + "A | \n", + "EUR | \n", + "FR0010149203 | \n", + "2017-01-31 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 95 | \n", + "4736134 | \n", + "4230498 | \n", + "L694_A | \n", + "15004.0 | \n", + "15004.0 | \n", + "420029 | \n", + "SWITZERLAND | \n", + "SWITZERLAND | \n", + "DIVERSIFIED | \n", + "EMERGING PATRIMOINE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC PORTFOLIO EMERGING PATRIMOINE | \n", + "A | \n", + "EUR | \n", + "LU0592698954 | \n", + "2019-09-30 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 96 | \n", + "4736135 | \n", + "4230499 | \n", + "L694_A | \n", + "15004.0 | \n", + "15004.0 | \n", + "420029 | \n", + "SWITZERLAND | \n", + "SWITZERLAND | \n", + "DIVERSIFIED | \n", + "EMERGING PATRIMOINE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC PORTFOLIO EMERGING PATRIMOINE | \n", + "A | \n", + "EUR | \n", + "LU0592698954 | \n", + "2020-05-31 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 97 | \n", + "4736136 | \n", + "4230500 | \n", + "L694_A | \n", + "15004.0 | \n", + "15004.0 | \n", + "420029 | \n", + "SWITZERLAND | \n", + "SWITZERLAND | \n", + "DIVERSIFIED | \n", + "PATRIMOINE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC PATRIMOINE | \n", + "A | \n", + "EUR | \n", + "FR0010135103 | \n", + "2015-08-31 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 98 | \n", + "4736137 | \n", + "4230501 | \n", + "L694_A | \n", + "15004.0 | \n", + "15004.0 | \n", + "420029 | \n", + "SWITZERLAND | \n", + "SWITZERLAND | \n", + "DIVERSIFIED | \n", + "PATRIMOINE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC PATRIMOINE | \n", + "A | \n", + "EUR | \n", + "FR0010135103 | \n", + "2016-07-31 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
| 99 | \n", + "4736138 | \n", + "4230502 | \n", + "L694_A | \n", + "15004.0 | \n", + "15004.0 | \n", + "420029 | \n", + "SWITZERLAND | \n", + "SWITZERLAND | \n", + "DIVERSIFIED | \n", + "PATRIMOINE | \n", + "... | \n", + "NO | \n", + "CARMIGNAC PATRIMOINE | \n", + "A | \n", + "EUR | \n", + "FR0010135103 | \n", + "2016-12-31 | \n", + "0.0 | \n", + "NaN | \n", + "NaN | \n", + "False | \n", + "
100 rows × 21 columns
\n", + "