Project_Carmignac/rupture.ipynb

125 lines
59 KiB
Plaintext
Raw Normal View History

2025-12-04 16:11:38 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "132a1aa1-4cb9-49e7-9f45-c09dd8fd57c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
]
}
],
"source": [
"import os\n",
"import s3fs\n",
"import pandas as pd\n",
"\n",
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc-data\"\n",
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
"\n",
"# Liste des fichiers AUM\n",
"all_files = fs.ls(carmignac_path)\n",
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers AUM :\", aum_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"aum_data = {}\n",
"for file_path in aum_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" aum_data[os.path.basename(file_path)] = df\n",
"\n",
"df = aum_data['AUM ENSAE V2 -20251105.csv']"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "076209a7-f6b3-4b78-9179-e24ff38338e0",
"metadata": {},
"outputs": [],
"source": [
"from detection_rupture import detect_ruptures"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "14c59f0f-676b-4d76-878a-1176cadfc9b1",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA9sAAAHqCAYAAAAUMF39AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAApi1JREFUeJzs3Xd4U+XbB/DvyWg66KAUOqBAWbKHgCwRlCmiMhTcKM6f4sIFKio4EFy4cYuvCwfgBioiyBCZMmW2rA4oXXSlGef9Izmn2U3StDkh3891eUlO1pPkaZI79/3cjyCKoggiIiIiIiIiChhVsAdAREREREREdK5hsE1EREREREQUYAy2iYiIiIiIiAKMwTYRERERERFRgDHYJiIiIiIiIgowBttEREREREREAcZgm4iIiIiIiCjAGGwTERERERERBRiDbSIiIiIiIqIAY7BNREREDW7nzp145plncPz48WAPpVafffYZ3nrrrWAPg4iIQgyDbSIiqtXNN9+MRo0aBXsYTrKzsyEIAj799NNgD4VsPPPMMxAEwe35JSUlGD9+PIqKipCent6AI/Pdjz/+iLvuugvnn39+sIeiGPy7IyLyDoNtIiKF+PTTTyEIAiIjI3Hy5Emn84cOHYquXbsGYWRUX3JycvDMM89gx44dwR5Kg7rlllvQq1cvvPbaa07nffnll1iwYEG9j+Gdd96pNVjMzs7Grbfeii+++AIDBw6s9zGFgw0bNuCZZ55BcXFxsIdCRFTvGGwTESmMXq/Hiy++GOxhUAPIycnB7NmzwyrYzs7ORp8+ffD5559DpXL+GqKkYHvHjh147733MH78+HofT7jYsGEDZs+ezWCbiMICg20iIoXp2bMnPvjgA+Tk5AR7KAFhNBpRXV0d7GH4LRTHX1VVBbPZHOxhuNS6dWs8/vjjiIyMDPZQajVu3DhMmDAh2MOoV0qeK0REoY7BNhGRwjz++OMwmUxeZbeNRiOeffZZtG3bFjqdTg5k9Hq93eVat26NsWPH4s8//0SfPn0QFRWFbt264c8//wQALFmyBN26dUNkZCR69+6N7du3u7y/I0eOYNSoUYiJiUFaWhrmzJkDURTl86W1nC+//DIWLFggj2vv3r0AgP/++w9XXXUVEhMTERkZiT59+uDHH3/06nkpLi7GzTffjPj4eCQkJGDKlClus2P+3o+n8Utl/tnZ2XbX+fPPPyEIgvxcAjUl/1u3bsXAgQMRFRWFjIwMLFy40O56ffv2BWApqxYEwW4dbOvWrXHzzTc7jXHo0KEYOnSo0/1//fXXePLJJ9G8eXNER0ejtLQUALBp0yaMHj0a8fHxiI6OxpAhQ7B+/Xq72zx79iweeOABtG7dGjqdDs2aNcOIESOwbdu2Wp+zdevWoW/fvoiMjETbtm3x3nvvub3s559/jt69eyMqKgqJiYm45ppr7BqkDR06FL/88guOHj0qPx+tW7eWz9fr9Xj66afRrl076HQ6pKen49FHH3Wa79J9XXDBBYiOjkbjxo1x0UUXYeXKlfJzu2fPHqxZs0a+H9vntLi4GA888ADS09Oh0+nQrl07zJs3zykoNZvNWLBgAbp06YLIyEgkJyfjzjvvRFFRkd3ltmzZglGjRiEpKUmeC1OnTq31uZX+bleuXImePXsiMjISnTt3xpIlS+wuV1hYiIcffhjdunVDo0aNEBcXh0svvRT//vuv3eVqmyuuePt3t3PnTtx8881o06YNIiMjkZKSgqlTp+LMmTPyZZ555hk88sgjAICMjAz5ubf9m6ptjhARhRJNsAdARET2MjIycNNNN+GDDz7AjBkzkJaW5vayt912GxYtWoSrrroKDz30EDZt2oS5c+di3759WLp0qd1lDx06hOuuuw533nknbrjhBrz88su4/PLLsXDhQjz++OO4++67AQBz587FpEmTsH//frsyX5PJhNGjR6N///6YP38+li9fjqeffhpGoxFz5syxu69PPvkEVVVVuOOOO6DT6ZCYmIg9e/Zg0KBBaN68OWbMmIGYmBh88803GDduHL7//nuPpbqiKOLKK6/EunXrcNddd6FTp05YunQppkyZ4nTZutyPp/H7qqioCGPGjMGkSZNw7bXX4ptvvsH//vc/REREYOrUqejUqRPmzJmDp556CnfccQcGDx4MAH6vDX722WcRERGBhx9+GHq9HhEREfjjjz9w6aWXonfv3nj66aehUqnwySef4JJLLsFff/2FCy64AABw11134bvvvsO0adPQuXNnnDlzBuvWrcO+ffs8NgbbtWsXRo4ciaZNm+KZZ56B0WjE008/jeTkZKfLPv/885g1axYmTZqE2267DadPn8abb76Jiy66CNu3b0dCQgKeeOIJlJSU4MSJE/J6bqkxn9lsxhVXXIF169bhjjvuQKdOnbBr1y689tprOHDgAJYtWybf1+zZs/HMM89g4MCBmDNnDiIiIrBp0yb88ccfGDlyJBYsWIB7770XjRo1whNPPAEA8pgrKiowZMgQnDx5EnfeeSdatmyJDRs2YObMmcjNzbUrcb/zzjvx6aef4pZbbsF9992HrKwsvPXWW9i+fTvWr18PrVaLU6dOyc/RjBkzkJCQgOzsbKeA2Z2DBw9i8uTJuOuuuzBlyhR88sknuPrqq7F8+XKMGDECgOVHsGXLluHqq69GRkYG8vPz8d5772HIkCHYu3ev03uIq7niii9/d5mZmThy5AhuueUWpKSkYM+ePXj//fexZ88e/P333xAEARMmTMCBAwfw1Vdf4bXXXkNSUhIAoGnTpl7PESKikCISEZEifPLJJyIAcfPmzeLhw4dFjUYj3nffffL5Q4YMEbt06SKf3rFjhwhAvO222+xu5+GHHxYBiH/88Yd8rFWrViIAccOGDfKxFStWiADEqKgo8ejRo/Lx9957TwQgrl69Wj42ZcoUEYB47733ysfMZrN42WWXiREREeLp06dFURTFrKwsEYAYFxcnnjp1ym5cw4YNE7t16yZWVVXZ3cbAgQPF9u3be3xuli1bJgIQ58+fLx8zGo3i4MGDRQDiJ598EpD78TR+6fXJysqyO7569Wqn52vIkCEiAPGVV16Rj+n1erFnz55is2bNxOrqalEURXHz5s1O45e0atVKnDJlitPxIUOGiEOGDHG6/zZt2ogVFRV2j7l9+/biqFGjRLPZLB+vqKgQMzIyxBEjRsjH4uPjxXvuucfTU+PSuHHjxMjISLv5s3fvXlGtVou2XzGys7NFtVotPv/883bX37Vrl6jRaOyOX3bZZWKrVq2c7uv//u//RJVKJf711192xxcuXCgCENevXy+KoigePHhQVKlU4vjx40WTyWR3WdvnoUuXLnbPo+TZZ58VY2JixAMHDtgdnzFjhqhWq8Vjx46JoiiKf/31lwhA/OKLL+wut3z5crvjS5culf+ufSX93X7//ffysZKSEjE1NVXs1auXfKyqqsrpsWZlZYk6nU6cM2eOfMzdXHHHl787V7f31VdfiQDEtWvXysdeeukll39HvswRIqJQwTJyIiIFatOmDW688Ua8//77yM3NdXmZX3/9FQAwffp0u+MPPfQQAOCXX36xO965c2cMGDBAPt2vXz8AwCWXXIKWLVs6HT9y5IjTfU6bNk3+tyAImDZtGqqrq/H777/bXW7ixIlytgqwlLn+8ccfmDRpEs6ePYuCggIUFBTgzJkzGDVqFA4ePOiyA7vtY9VoNPjf//4nH1Or1bj33nvtLlfX+3E3fn9oNBrceeed8umIiAjceeedOHXqFLZu3Vqn23ZlypQpiIqKkk/v2LEDBw8exHXXXYczZ87Iz0V5eTmGDRuGtWvXymXRCQkJ2LRpk099AkwmE1asWIFx48bZzZ9OnTph1KhRdpddsmQJzGYzJk2aJI+joKAAKSkpaN++PVavXl3r/X377bfo1KkTOnbsaHcbl1xyCQDIt7Fs2TKYzWY89dRTTg3YPG1HZns/gwcPRuPGje3uZ/jw4TCZTFi7dq18ufj4eIwYMcLucr1790ajRo3k8UjZ2J9//hkGg6HW+3eUlpZmV40RFxeHm266Cdu
"text/plain": [
"<Figure size 1000x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"ruptures_df = detect_ruptures(df, 0.10)\n",
"\n",
"# Copier pour éviter les effets de bord\n",
"df_plot = ruptures_df.copy()\n",
"df_plot['date'] = pd.to_datetime(df_plot['date'])\n",
"\n",
"# Compter les ruptures par date\n",
"counts = df_plot.groupby('date').size().reset_index(name='rupture_count')\n",
"\n",
"plt.figure(figsize=(10, 5))\n",
"plt.plot(counts['date'], counts['rupture_count'], marker='.')\n",
"plt.xlabel(\"Date\")\n",
"plt.ylabel(\"Nombre de ruptures\")\n",
"plt.title(\"Nombre de ruptures détectées par date\")\n",
"plt.grid(True)\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d571dba-c435-477b-a0a1-2550908c8f4a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}