diff --git a/Data_Exploration/explore.ipynb b/Data_Exploration/explore.ipynb deleted file mode 100644 index 5fdb768..0000000 --- a/Data_Exploration/explore.ipynb +++ /dev/null @@ -1,57 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "bd938e6e", - "metadata": {}, - "source": [ - "**Short notebook to test connectivity with S3 services and explore the data**" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ae3c64fe", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import s3fs\n", - "fs = s3fs.S3FileSystem(\n", - " client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n", - " key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n", - " secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n", - " token = os.environ[\"AWS_SESSION_TOKEN\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83472648", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/data_exploration/explore.ipynb b/data_exploration/explore.ipynb new file mode 100644 index 0000000..113683b --- /dev/null +++ b/data_exploration/explore.ipynb @@ -0,0 +1,685 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bd938e6e", + "metadata": {}, + "source": [ + "**Short notebook to test connectivity with S3 services and explore the data**" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "127753ac", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ae3c64fe", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import s3fs\n", + "fs = s3fs.S3FileSystem(\n", + " client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n", + " key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n", + " secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n", + " token = os.environ[\"AWS_SESSION_TOKEN\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "83472648", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_15067/1129287322.py:2: DtypeWarning: Columns (0,1,2,3) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(f, sep=\";\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Agreement - CodeCompany - IdCompany - Ultimate Parent IdRegistrar Account - IDRegistrar Account - RegionRegistrarAccount - CountryProduct - Asset TypeProduct - StrategyProduct - Legal StatusProduct - Is Dedie ?Product - FundProduct - Shareclass TypeProduct - Shareclass CurrencyProduct - IsinCentralisation DateQuantity - AUMValue - AUM CCYValue - AUM €
03166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-03-3135.36824648.666624648.6666
13166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-11-3035.36822413.055322413.0553
23166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-12-3135.36822051.240622051.2406
33166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032016-03-3135.36821626.117321626.1173
43166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032016-11-3035.36822489.450222489.4502
53166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032017-06-3035.36823225.458223225.4582
63166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032017-08-3135.36822964.088722964.0887
73166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032018-03-3135.36822692.462522692.4625
83166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032018-05-3135.36822749.758622749.7586
93166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032019-02-2835.36820875.962020875.9620
103166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032019-08-310.0000.00000.0000
113166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-01-3135.36823359.149323359.1493
123166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032015-05-3135.36824373.857224373.8572
133166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032016-05-3135.36821974.845821974.8458
143166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032019-07-310.0000.00000.0000
153166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032020-02-290.0000.00000.0000
163166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032020-05-310.0000.00000.0000
173166.0166.0200000647FranceFranceDiversifiedPatrimoineFCPNOCarmignac PatrimoineAEURFR00101351032021-04-300.0000.00000.0000
183166.0166.0200000647FranceFranceEquityInvestissementFCPNOCarmignac InvestissementAEURFR00101489812015-06-30193.970242968.7617242968.7617
193166.0166.0200000647FranceFranceEquityInvestissementFCPNOCarmignac InvestissementAEURFR00101489812016-03-31193.970206161.0145206161.0145
\n", + "
" + ], + "text/plain": [ + " Agreement - Code Company - Id Company - Ultimate Parent Id \\\n", + "0 3 166.0 166.0 \n", + "1 3 166.0 166.0 \n", + "2 3 166.0 166.0 \n", + "3 3 166.0 166.0 \n", + "4 3 166.0 166.0 \n", + "5 3 166.0 166.0 \n", + "6 3 166.0 166.0 \n", + "7 3 166.0 166.0 \n", + "8 3 166.0 166.0 \n", + "9 3 166.0 166.0 \n", + "10 3 166.0 166.0 \n", + "11 3 166.0 166.0 \n", + "12 3 166.0 166.0 \n", + "13 3 166.0 166.0 \n", + "14 3 166.0 166.0 \n", + "15 3 166.0 166.0 \n", + "16 3 166.0 166.0 \n", + "17 3 166.0 166.0 \n", + "18 3 166.0 166.0 \n", + "19 3 166.0 166.0 \n", + "\n", + " Registrar Account - ID Registrar Account - Region \\\n", + "0 200000647 France \n", + "1 200000647 France \n", + "2 200000647 France \n", + "3 200000647 France \n", + "4 200000647 France \n", + "5 200000647 France \n", + "6 200000647 France \n", + "7 200000647 France \n", + "8 200000647 France \n", + "9 200000647 France \n", + "10 200000647 France \n", + "11 200000647 France \n", + "12 200000647 France \n", + "13 200000647 France \n", + "14 200000647 France \n", + "15 200000647 France \n", + "16 200000647 France \n", + "17 200000647 France \n", + "18 200000647 France \n", + "19 200000647 France \n", + "\n", + " RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n", + "0 France Diversified Patrimoine \n", + "1 France Diversified Patrimoine \n", + "2 France Diversified Patrimoine \n", + "3 France Diversified Patrimoine \n", + "4 France Diversified Patrimoine \n", + "5 France Diversified Patrimoine \n", + "6 France Diversified Patrimoine \n", + "7 France Diversified Patrimoine \n", + "8 France Diversified Patrimoine \n", + "9 France Diversified Patrimoine \n", + "10 France Diversified Patrimoine \n", + "11 France Diversified Patrimoine \n", + "12 France Diversified Patrimoine \n", + "13 France Diversified Patrimoine \n", + "14 France Diversified Patrimoine \n", + "15 France Diversified Patrimoine \n", + "16 France Diversified Patrimoine \n", + "17 France Diversified Patrimoine \n", + "18 France Equity Investissement \n", + "19 France Equity Investissement \n", + "\n", + " Product - Legal Status Product - Is Dedie ? Product - Fund \\\n", + "0 FCP NO Carmignac Patrimoine \n", + "1 FCP NO Carmignac Patrimoine \n", + "2 FCP NO Carmignac Patrimoine \n", + "3 FCP NO Carmignac Patrimoine \n", + "4 FCP NO Carmignac Patrimoine \n", + "5 FCP NO Carmignac Patrimoine \n", + "6 FCP NO Carmignac Patrimoine \n", + "7 FCP NO Carmignac Patrimoine \n", + "8 FCP NO Carmignac Patrimoine \n", + "9 FCP NO Carmignac Patrimoine \n", + "10 FCP NO Carmignac Patrimoine \n", + "11 FCP NO Carmignac Patrimoine \n", + "12 FCP NO Carmignac Patrimoine \n", + "13 FCP NO Carmignac Patrimoine \n", + "14 FCP NO Carmignac Patrimoine \n", + "15 FCP NO Carmignac Patrimoine \n", + "16 FCP NO Carmignac Patrimoine \n", + "17 FCP NO Carmignac Patrimoine \n", + "18 FCP NO Carmignac Investissement \n", + "19 FCP NO Carmignac Investissement \n", + "\n", + " Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n", + "0 A EUR FR0010135103 \n", + "1 A EUR FR0010135103 \n", + "2 A EUR FR0010135103 \n", + "3 A EUR FR0010135103 \n", + "4 A EUR FR0010135103 \n", + "5 A EUR FR0010135103 \n", + "6 A EUR FR0010135103 \n", + "7 A EUR FR0010135103 \n", + "8 A EUR FR0010135103 \n", + "9 A EUR FR0010135103 \n", + "10 A EUR FR0010135103 \n", + "11 A EUR FR0010135103 \n", + "12 A EUR FR0010135103 \n", + "13 A EUR FR0010135103 \n", + "14 A EUR FR0010135103 \n", + "15 A EUR FR0010135103 \n", + "16 A EUR FR0010135103 \n", + "17 A EUR FR0010135103 \n", + "18 A EUR FR0010148981 \n", + "19 A EUR FR0010148981 \n", + "\n", + " Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n", + "0 2015-03-31 35.368 24648.6666 24648.6666 \n", + "1 2015-11-30 35.368 22413.0553 22413.0553 \n", + "2 2015-12-31 35.368 22051.2406 22051.2406 \n", + "3 2016-03-31 35.368 21626.1173 21626.1173 \n", + "4 2016-11-30 35.368 22489.4502 22489.4502 \n", + "5 2017-06-30 35.368 23225.4582 23225.4582 \n", + "6 2017-08-31 35.368 22964.0887 22964.0887 \n", + "7 2018-03-31 35.368 22692.4625 22692.4625 \n", + "8 2018-05-31 35.368 22749.7586 22749.7586 \n", + "9 2019-02-28 35.368 20875.9620 20875.9620 \n", + "10 2019-08-31 0.000 0.0000 0.0000 \n", + "11 2015-01-31 35.368 23359.1493 23359.1493 \n", + "12 2015-05-31 35.368 24373.8572 24373.8572 \n", + "13 2016-05-31 35.368 21974.8458 21974.8458 \n", + "14 2019-07-31 0.000 0.0000 0.0000 \n", + "15 2020-02-29 0.000 0.0000 0.0000 \n", + "16 2020-05-31 0.000 0.0000 0.0000 \n", + "17 2021-04-30 0.000 0.0000 0.0000 \n", + "18 2015-06-30 193.970 242968.7617 242968.7617 \n", + "19 2016-03-31 193.970 206161.0145 206161.0145 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with fs.open('s3://projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv', 'rb') as f:\n", + " df = pd.read_csv(f, sep=\";\")\n", + "\n", + "df.head(20)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}