analyse temporelle

This commit is contained in:
Louis MORAINE 2026-04-09 14:46:41 +00:00
parent 8862b78fa3
commit 9fad80e04d
2 changed files with 3165 additions and 364 deletions

View File

@ -1806,10 +1806,179 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 240,
"id": "c697888b-cb72-4a98-86af-56647f5a5161", "id": "c697888b-cb72-4a98-86af-56647f5a5161",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>log_aum_qty_mean</th>\n",
" <th>log_gross_flow_qty_mean</th>\n",
" <th>frequency</th>\n",
" <th>rel_intensity_total</th>\n",
" <th>net_flow_qty_vol</th>\n",
" <th>n_tx_total</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>409</th>\n",
" <td>12.907910</td>\n",
" <td>10.377945</td>\n",
" <td>0.984615</td>\n",
" <td>10.355747</td>\n",
" <td>25238.018408</td>\n",
" <td>38671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>381</th>\n",
" <td>14.850749</td>\n",
" <td>11.109940</td>\n",
" <td>1.000000</td>\n",
" <td>3.085492</td>\n",
" <td>39047.638017</td>\n",
" <td>36949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399</th>\n",
" <td>14.329599</td>\n",
" <td>10.848036</td>\n",
" <td>1.000000</td>\n",
" <td>3.998634</td>\n",
" <td>32239.885939</td>\n",
" <td>36456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>418</th>\n",
" <td>14.247808</td>\n",
" <td>11.848117</td>\n",
" <td>1.000000</td>\n",
" <td>11.796898</td>\n",
" <td>99058.868316</td>\n",
" <td>28497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>419</th>\n",
" <td>14.627385</td>\n",
" <td>11.110770</td>\n",
" <td>1.000000</td>\n",
" <td>1.484969</td>\n",
" <td>30763.896059</td>\n",
" <td>27296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>314</th>\n",
" <td>11.077486</td>\n",
" <td>7.942331</td>\n",
" <td>0.043478</td>\n",
" <td>1.000000</td>\n",
" <td>13490.257463</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238</th>\n",
" <td>9.093884</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>10.191618</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>266</th>\n",
" <td>8.939081</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>328</th>\n",
" <td>12.580429</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>421 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" log_aum_qty_mean log_gross_flow_qty_mean frequency \\\n",
"409 12.907910 10.377945 0.984615 \n",
"381 14.850749 11.109940 1.000000 \n",
"399 14.329599 10.848036 1.000000 \n",
"418 14.247808 11.848117 1.000000 \n",
"419 14.627385 11.110770 1.000000 \n",
".. ... ... ... \n",
"314 11.077486 7.942331 0.043478 \n",
"238 9.093884 0.000000 0.000000 \n",
"198 10.191618 0.000000 0.000000 \n",
"266 8.939081 0.000000 0.000000 \n",
"328 12.580429 0.000000 0.000000 \n",
"\n",
" rel_intensity_total net_flow_qty_vol n_tx_total \n",
"409 10.355747 25238.018408 38671 \n",
"381 3.085492 39047.638017 36949 \n",
"399 3.998634 32239.885939 36456 \n",
"418 11.796898 99058.868316 28497 \n",
"419 1.484969 30763.896059 27296 \n",
".. ... ... ... \n",
"314 1.000000 13490.257463 1 \n",
"238 0.000000 0.000000 0 \n",
"198 0.000000 0.000000 0 \n",
"266 0.000000 0.000000 0 \n",
"328 0.000000 0.000000 0 \n",
"\n",
"[421 rows x 6 columns]"
]
},
"execution_count": 240,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"X_sorted = X.sort_values(by=\"n_tx_total\", ascending=False)\n", "X_sorted = X.sort_values(by=\"n_tx_total\", ascending=False)\n",
"X_sorted" "X_sorted"
@ -3208,7 +3377,6 @@
"\n", "\n",
"for start, end in windows:\n", "for start, end in windows:\n",
" # FILTRAGE : On recalcule les variables sur la période (simulation)\n", " # FILTRAGE : On recalcule les variables sur la période (simulation)\n",
" # Note : Tu dois adapter cette partie à tes données brutes par date\n",
" df_period = df_month[(df_month['month'] > start) & (df_month['month'] <= end)].copy()\n", " df_period = df_month[(df_month['month'] > start) & (df_month['month'] <= end)].copy()\n",
"\n", "\n",
" eps = 1e-9 \n", " eps = 1e-9 \n",
@ -3513,6 +3681,84 @@
"df_evo.groupby([\"cluster_2016\", \"cluster_2019\"]).size().unstack()" "df_evo.groupby([\"cluster_2016\", \"cluster_2019\"]).size().unstack()"
] ]
}, },
{
"cell_type": "code",
"execution_count": 212,
"id": "40cf3d1e-e53c-4a13-acba-c9d6cebfcca0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cluster_2019</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_2016</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>38.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>82.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cluster_2019 1 2 3\n",
"cluster_2016 \n",
"1 8.0 2.0 10.0\n",
"2 1.0 38.0 5.0\n",
"3 NaN 5.0 82.0"
]
},
"execution_count": 212,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_evo.groupby([\"cluster_2016\", \"cluster_2019\"]).size().unstack()"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 213, "execution_count": 213,

File diff suppressed because one or more lines are too long