This commit is contained in:
Louis MORAINE 2025-12-04 08:48:07 +00:00
parent 224d45b166
commit 7b47c1d61e
4 changed files with 2268 additions and 8 deletions

View File

@ -0,0 +1,551 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "f996e528-002f-4856-a67a-5120e8af86ad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fichiers Flows : ['projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv']\n",
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
]
}
],
"source": [
"import os\n",
"import s3fs\n",
"import pandas as pd\n",
"\n",
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc-data\"\n",
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
"\n",
"# Liste des fichiers FLOWS\n",
"all_files = fs.ls(carmignac_path)\n",
"flows_files = [f for f in all_files if \"Flows\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers Flows :\", flows_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"flows_data = {}\n",
"for file_path in flows_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" flows_data[os.path.basename(file_path)] = df\n",
"\n",
"\n",
"# Liste des fichiers AUM\n",
"all_files = fs.ls(carmignac_path)\n",
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers AUM :\", aum_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"aum_data = {}\n",
"for file_path in aum_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" aum_data[os.path.basename(file_path)] = df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "bdfc2afe-c3aa-41b6-bb40-3a7bf2a39d9a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>...</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - Subscription</th>\n",
" <th>Quantity - Redemption</th>\n",
" <th>Quantity - NetFlows</th>\n",
" <th>Value Ccy - Subscription</th>\n",
" <th>Value Ccy - Redemption</th>\n",
" <th>Value Ccy - NetFlows</th>\n",
" <th>Value € - Subscription</th>\n",
" <th>Value € - Redemption</th>\n",
" <th>Value € - NetFlows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200127202</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2020-11-05</td>\n",
" <td>1636.00</td>\n",
" <td>0.000</td>\n",
" <td>1636.000</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2015-03-09</td>\n",
" <td>144.69</td>\n",
" <td>0.000</td>\n",
" <td>144.690</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2016-10-26</td>\n",
" <td>0.00</td>\n",
" <td>-8.321</td>\n",
" <td>-8.321</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2018-10-18</td>\n",
" <td>0.00</td>\n",
" <td>-22.083</td>\n",
" <td>-22.083</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2019-04-08</td>\n",
" <td>0.00</td>\n",
" <td>-465.992</td>\n",
" <td>-465.992</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
"\n",
" Product - Legal Status Product - Is Dedie ? ... Centralisation Date \\\n",
"0 SICAV NO ... 2020-11-05 \n",
"1 FCP NO ... 2015-03-09 \n",
"2 FCP NO ... 2016-10-26 \n",
"3 FCP NO ... 2018-10-18 \n",
"4 FCP NO ... 2019-04-08 \n",
"\n",
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.00 0.000 1636.000 \n",
"1 144.69 0.000 144.690 \n",
"2 0.00 -8.321 -8.321 \n",
"3 0.00 -22.083 -22.083 \n",
"4 0.00 -465.992 -465.992 \n",
"\n",
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = flows_data['Flows ENSAE V2 -20251105.csv']\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6af3bcd9-0a54-4087-a8cf-203fb6f8a947",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>35.368</td>\n",
" <td>24648.6666</td>\n",
" <td>24648.6666</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22413.0553</td>\n",
" <td>22413.0553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-12-31</td>\n",
" <td>35.368</td>\n",
" <td>22051.2406</td>\n",
" <td>22051.2406</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-03-31</td>\n",
" <td>35.368</td>\n",
" <td>21626.1173</td>\n",
" <td>21626.1173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22489.4502</td>\n",
" <td>22489.4502</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"\n",
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
"0 FCP NO Carmignac Patrimoine \n",
"1 FCP NO Carmignac Patrimoine \n",
"2 FCP NO Carmignac Patrimoine \n",
"3 FCP NO Carmignac Patrimoine \n",
"4 FCP NO Carmignac Patrimoine \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
"0 A EUR FR0010135103 \n",
"1 A EUR FR0010135103 \n",
"2 A EUR FR0010135103 \n",
"3 A EUR FR0010135103 \n",
"4 A EUR FR0010135103 \n",
"\n",
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
"4 2016-11-30 35.368 22489.4502 22489.4502 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dg = aum_data['AUM ENSAE V2 -20251105.csv']\n",
"dg.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d9cd20d-2e79-47b8-91f1-09fee4962dc6",
"metadata": {},
"outputs": [],
"source": [
"df_ids = set(df['Registrar Account - ID'].unique())\n",
"dg_ids = set(dg['Registrar Account - ID'].unique())\n",
"\n",
"intersect = df_ids & dg_ids # comptes dans les deux\n",
"only_df = df_ids - dg_ids # comptes seulement dans df\n",
"only_dg = dg_ids - df_ids # comptes seulement dans dg\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a374690-119e-4bbb-a12d-13d0305780ad",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

616
data_flux.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"id": "126c8a80-d9ad-4816-84f0-0c3d580f62c8", "id": "126c8a80-d9ad-4816-84f0-0c3d580f62c8",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -12,7 +12,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "ff2261fb-9516-4410-b42d-3acc8dc1a460", "id": "ff2261fb-9516-4410-b42d-3acc8dc1a460",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -32,7 +32,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 3,
"id": "dc546698-76dc-4eaf-b9e2-7602953bf8f5", "id": "dc546698-76dc-4eaf-b9e2-7602953bf8f5",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -156,7 +156,7 @@
"4 May 2015 23550000.0 204625.93 " "4 May 2015 23550000.0 204625.93 "
] ]
}, },
"execution_count": 8, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }