Project_Carmignac/data_exploration/explore.ipynb

227 lines
6.2 KiB
Plaintext
Raw Normal View History

2025-11-25 17:27:19 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "bd938e6e",
"metadata": {},
"source": [
"**Short notebook to test connectivity with S3 services and explore the data**"
]
},
{
"cell_type": "code",
2026-02-02 11:37:16 +01:00
"execution_count": 1,
2025-11-25 17:27:19 +01:00
"id": "127753ac",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
2026-02-02 11:37:16 +01:00
"execution_count": 2,
2025-11-25 17:27:19 +01:00
"id": "ae3c64fe",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import s3fs\n",
"fs = s3fs.S3FileSystem(\n",
" client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
" key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
" secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
" token = os.environ[\"AWS_SESSION_TOKEN\"])"
]
},
{
"cell_type": "code",
2026-02-02 11:37:16 +01:00
"execution_count": 3,
2026-02-02 10:24:49 +01:00
"id": "84b9ac42",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"def sample_by_blocks(df, block_size=10, num_blocks=10, random_state=None):\n",
" \"\"\"Sample num_blocks blocks of block_size consecutive rows (no overlapping blocks).\"\"\"\n",
" n = len(df)\n",
" max_start = n - block_size\n",
" if max_start < 0:\n",
" raise ValueError(f\"DataFrame has {n} rows, need at least {block_size}\")\n",
" if max_start + 1 < num_blocks:\n",
" raise ValueError(f\"Not enough room for {num_blocks} non-overlapping blocks (need at least {num_blocks * block_size} rows)\")\n",
" rng = np.random.default_rng(random_state)\n",
" chosen_starts = rng.choice(max_start + 1, size=num_blocks, replace=False)\n",
" chosen_starts.sort() # blocks in order of position in original df\n",
" indices = np.concatenate([np.arange(s, s + block_size) for s in chosen_starts])\n",
" return df.iloc[indices].reset_index(drop=True)\n",
"\n",
"# sample_df = sample_by_blocks(df, block_size=10, num_blocks=10, random_state=42)"
]
},
{
"cell_type": "code",
2026-02-02 12:31:08 +01:00
"execution_count": 9,
2025-11-25 17:27:19 +01:00
"id": "83472648",
"metadata": {},
2026-02-02 11:37:16 +01:00
"outputs": [],
"source": [
2026-02-02 12:31:08 +01:00
"with fs.open('s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv', 'rb') as f:\n",
" df = pd.read_csv(f, sep =\";\")\n",
2026-02-02 11:37:16 +01:00
"\n",
2026-02-02 12:31:08 +01:00
"sample_df = df"
2026-02-02 11:37:16 +01:00
]
},
{
"cell_type": "code",
2026-02-02 12:31:08 +01:00
"execution_count": 10,
2026-02-02 11:37:16 +01:00
"id": "79af063e",
"metadata": {},
2025-11-25 17:27:19 +01:00
"outputs": [
{
2026-02-02 11:37:16 +01:00
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
2026-02-02 12:31:08 +01:00
" <th>Yld to Maturity</th>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2026-02-02 12:31:08 +01:00
" <td>31/12/2014</td>\n",
" <td>0.144</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2026-02-02 12:31:08 +01:00
" <td>02/01/2015</td>\n",
" <td>-0.079</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2026-02-02 12:31:08 +01:00
" <td>05/01/2015</td>\n",
" <td>-0.074</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2026-02-02 12:31:08 +01:00
" <td>06/01/2015</td>\n",
" <td>-0.075</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2026-02-02 12:31:08 +01:00
" <td>07/01/2015</td>\n",
" <td>-0.069</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
2026-02-02 12:31:08 +01:00
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
2026-02-02 12:31:08 +01:00
" <th>2821</th>\n",
" <td>16/10/2025</td>\n",
" <td>1.928</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
2026-02-02 12:31:08 +01:00
" <th>2822</th>\n",
" <td>17/10/2025</td>\n",
" <td>1.928</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
2026-02-02 12:31:08 +01:00
" <th>2823</th>\n",
" <td>20/10/2025</td>\n",
" <td>1.928</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
2026-02-02 12:31:08 +01:00
" <th>2824</th>\n",
" <td>21/10/2025</td>\n",
" <td>1.927</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" <tr>\n",
2026-02-02 12:31:08 +01:00
" <th>2825</th>\n",
" <td>22/10/2025</td>\n",
" <td>1.928</td>\n",
2026-02-02 11:37:16 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2026-02-02 12:31:08 +01:00
"<p>2826 rows × 2 columns</p>\n",
2026-02-02 11:37:16 +01:00
"</div>"
],
"text/plain": [
2026-02-02 12:31:08 +01:00
" Date Yld to Maturity\n",
"0 31/12/2014 0.144\n",
"1 02/01/2015 -0.079\n",
"2 05/01/2015 -0.074\n",
"3 06/01/2015 -0.075\n",
"4 07/01/2015 -0.069\n",
"... ... ...\n",
"2821 16/10/2025 1.928\n",
"2822 17/10/2025 1.928\n",
"2823 20/10/2025 1.928\n",
"2824 21/10/2025 1.927\n",
"2825 22/10/2025 1.928\n",
2026-02-02 11:37:16 +01:00
"\n",
2026-02-02 12:31:08 +01:00
"[2826 rows x 2 columns]"
2026-02-02 11:37:16 +01:00
]
},
2026-02-02 12:31:08 +01:00
"execution_count": 10,
2026-02-02 11:37:16 +01:00
"metadata": {},
"output_type": "execute_result"
2025-11-25 17:27:19 +01:00
}
],
"source": [
2026-02-02 11:37:16 +01:00
"sample_df"
2026-02-02 10:24:49 +01:00
]
},
{
"cell_type": "code",
2026-02-02 12:31:08 +01:00
"execution_count": 11,
2026-02-02 10:24:49 +01:00
"id": "36ec4312",
"metadata": {},
"outputs": [],
"source": [
2026-02-02 12:31:08 +01:00
"sample_df.to_csv('str_Rates.csv', index=False)"
2025-11-25 17:27:19 +01:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2026-02-02 10:24:49 +01:00
"version": "3.13.11"
2025-11-25 17:27:19 +01:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}