defi cleaning
This commit is contained in:
parent
56c4bd8a2f
commit
636c1acb8d
634
.ipynb_checkpoints/Challenge_cleaning-checkpoint.ipynb
Normal file
634
.ipynb_checkpoints/Challenge_cleaning-checkpoint.ipynb
Normal file
|
|
@ -0,0 +1,634 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "4287b380-5359-49c7-ab95-ad346a3ad17a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Fichiers Flows : ['projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv']\n",
|
||||
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import des données\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"import s3fs\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||
"\n",
|
||||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
|
||||
"\n",
|
||||
"BUCKET = \"projet-bdc-data\"\n",
|
||||
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
|
||||
"\n",
|
||||
"# Liste des fichiers FLOWS\n",
|
||||
"all_files = fs.ls(carmignac_path)\n",
|
||||
"flows_files = [f for f in all_files if \"Flows\" in f and f.endswith(\".csv\")]\n",
|
||||
"print(\"Fichiers Flows :\", flows_files)\n",
|
||||
"\n",
|
||||
"# Lire tous les fichiers dans un dictionnaire\n",
|
||||
"flows_data = {}\n",
|
||||
"for file_path in flows_files:\n",
|
||||
" with fs.open(file_path, 'r') as f:\n",
|
||||
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
|
||||
" flows_data[os.path.basename(file_path)] = df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Liste des fichiers AUM\n",
|
||||
"all_files = fs.ls(carmignac_path)\n",
|
||||
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
|
||||
"print(\"Fichiers AUM :\", aum_files)\n",
|
||||
"\n",
|
||||
"# Lire tous les fichiers dans un dictionnaire\n",
|
||||
"aum_data = {}\n",
|
||||
"for file_path in aum_files:\n",
|
||||
" with fs.open(file_path, 'r') as f:\n",
|
||||
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
|
||||
" aum_data[os.path.basename(file_path)] = df\n",
|
||||
"\n",
|
||||
"df = aum_data['AUM ENSAE V2 -20251105.csv']\n",
|
||||
"dg = flows_data['Flows ENSAE V2 -20251105.csv']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "0b66aee0-b726-4a57-9461-6a4550a625a8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Agreement - Code</th>\n",
|
||||
" <th>Company - Id</th>\n",
|
||||
" <th>Company - Ultimate Parent Id</th>\n",
|
||||
" <th>Registrar Account - ID</th>\n",
|
||||
" <th>Registrar Account - Region</th>\n",
|
||||
" <th>RegistrarAccount - Country</th>\n",
|
||||
" <th>Product - Asset Type</th>\n",
|
||||
" <th>Product - Strategy</th>\n",
|
||||
" <th>Product - Legal Status</th>\n",
|
||||
" <th>Product - Is Dedie ?</th>\n",
|
||||
" <th>Product - Fund</th>\n",
|
||||
" <th>Product - Shareclass Type</th>\n",
|
||||
" <th>Product - Shareclass Currency</th>\n",
|
||||
" <th>Product - Isin</th>\n",
|
||||
" <th>Centralisation Date</th>\n",
|
||||
" <th>Quantity - AUM</th>\n",
|
||||
" <th>Value - AUM CCY</th>\n",
|
||||
" <th>Value - AUM €</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>200000647</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Diversified</td>\n",
|
||||
" <td>Patrimoine</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>Carmignac Patrimoine</td>\n",
|
||||
" <td>A</td>\n",
|
||||
" <td>EUR</td>\n",
|
||||
" <td>FR0010135103</td>\n",
|
||||
" <td>2015-03-31</td>\n",
|
||||
" <td>35.368</td>\n",
|
||||
" <td>24648.6666</td>\n",
|
||||
" <td>24648.6666</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>200000647</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Diversified</td>\n",
|
||||
" <td>Patrimoine</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>Carmignac Patrimoine</td>\n",
|
||||
" <td>A</td>\n",
|
||||
" <td>EUR</td>\n",
|
||||
" <td>FR0010135103</td>\n",
|
||||
" <td>2015-11-30</td>\n",
|
||||
" <td>35.368</td>\n",
|
||||
" <td>22413.0553</td>\n",
|
||||
" <td>22413.0553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>200000647</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Diversified</td>\n",
|
||||
" <td>Patrimoine</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>Carmignac Patrimoine</td>\n",
|
||||
" <td>A</td>\n",
|
||||
" <td>EUR</td>\n",
|
||||
" <td>FR0010135103</td>\n",
|
||||
" <td>2015-12-31</td>\n",
|
||||
" <td>35.368</td>\n",
|
||||
" <td>22051.2406</td>\n",
|
||||
" <td>22051.2406</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>200000647</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Diversified</td>\n",
|
||||
" <td>Patrimoine</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>Carmignac Patrimoine</td>\n",
|
||||
" <td>A</td>\n",
|
||||
" <td>EUR</td>\n",
|
||||
" <td>FR0010135103</td>\n",
|
||||
" <td>2016-03-31</td>\n",
|
||||
" <td>35.368</td>\n",
|
||||
" <td>21626.1173</td>\n",
|
||||
" <td>21626.1173</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>200000647</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Diversified</td>\n",
|
||||
" <td>Patrimoine</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>Carmignac Patrimoine</td>\n",
|
||||
" <td>A</td>\n",
|
||||
" <td>EUR</td>\n",
|
||||
" <td>FR0010135103</td>\n",
|
||||
" <td>2016-11-30</td>\n",
|
||||
" <td>35.368</td>\n",
|
||||
" <td>22489.4502</td>\n",
|
||||
" <td>22489.4502</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
|
||||
"0 003 166 166 \n",
|
||||
"1 003 166 166 \n",
|
||||
"2 003 166 166 \n",
|
||||
"3 003 166 166 \n",
|
||||
"4 003 166 166 \n",
|
||||
"\n",
|
||||
" Registrar Account - ID Registrar Account - Region \\\n",
|
||||
"0 200000647 France \n",
|
||||
"1 200000647 France \n",
|
||||
"2 200000647 France \n",
|
||||
"3 200000647 France \n",
|
||||
"4 200000647 France \n",
|
||||
"\n",
|
||||
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
|
||||
"0 France Diversified Patrimoine \n",
|
||||
"1 France Diversified Patrimoine \n",
|
||||
"2 France Diversified Patrimoine \n",
|
||||
"3 France Diversified Patrimoine \n",
|
||||
"4 France Diversified Patrimoine \n",
|
||||
"\n",
|
||||
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
|
||||
"0 FCP NO Carmignac Patrimoine \n",
|
||||
"1 FCP NO Carmignac Patrimoine \n",
|
||||
"2 FCP NO Carmignac Patrimoine \n",
|
||||
"3 FCP NO Carmignac Patrimoine \n",
|
||||
"4 FCP NO Carmignac Patrimoine \n",
|
||||
"\n",
|
||||
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
|
||||
"0 A EUR FR0010135103 \n",
|
||||
"1 A EUR FR0010135103 \n",
|
||||
"2 A EUR FR0010135103 \n",
|
||||
"3 A EUR FR0010135103 \n",
|
||||
"4 A EUR FR0010135103 \n",
|
||||
"\n",
|
||||
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
|
||||
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
|
||||
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
|
||||
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
|
||||
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
|
||||
"4 2016-11-30 35.368 22489.4502 22489.4502 "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a5509e6e-ff10-4388-9fee-5cd49d01b60a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Agreement - Code</th>\n",
|
||||
" <th>Company - Id</th>\n",
|
||||
" <th>Company - Ultimate Parent Id</th>\n",
|
||||
" <th>Registrar Account - ID</th>\n",
|
||||
" <th>Registrar Account - Region</th>\n",
|
||||
" <th>RegistrarAccount - Country</th>\n",
|
||||
" <th>Product - Asset Type</th>\n",
|
||||
" <th>Product - Strategy</th>\n",
|
||||
" <th>Product - Legal Status</th>\n",
|
||||
" <th>Product - Is Dedie ?</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>Centralisation Date</th>\n",
|
||||
" <th>Quantity - Subscription</th>\n",
|
||||
" <th>Quantity - Redemption</th>\n",
|
||||
" <th>Quantity - NetFlows</th>\n",
|
||||
" <th>Value Ccy - Subscription</th>\n",
|
||||
" <th>Value Ccy - Redemption</th>\n",
|
||||
" <th>Value Ccy - NetFlows</th>\n",
|
||||
" <th>Value € - Subscription</th>\n",
|
||||
" <th>Value € - Redemption</th>\n",
|
||||
" <th>Value € - NetFlows</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>200127202</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Equity</td>\n",
|
||||
" <td>Investissement</td>\n",
|
||||
" <td>SICAV</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2020-11-05</td>\n",
|
||||
" <td>1636.00</td>\n",
|
||||
" <td>0.000</td>\n",
|
||||
" <td>1636.000</td>\n",
|
||||
" <td>280983.00</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>280983.00</td>\n",
|
||||
" <td>280983.00</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>280983.00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>406533</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Diversified</td>\n",
|
||||
" <td>Patrimoine</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2015-03-09</td>\n",
|
||||
" <td>144.69</td>\n",
|
||||
" <td>0.000</td>\n",
|
||||
" <td>144.690</td>\n",
|
||||
" <td>99985.13</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>99985.13</td>\n",
|
||||
" <td>99985.13</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>99985.13</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>406533</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Equity</td>\n",
|
||||
" <td>Investissement</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2016-10-26</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-8.321</td>\n",
|
||||
" <td>-8.321</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-9384.76</td>\n",
|
||||
" <td>-9384.76</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-9384.76</td>\n",
|
||||
" <td>-9384.76</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>406533</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Equity</td>\n",
|
||||
" <td>Investissement</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2018-10-18</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-22.083</td>\n",
|
||||
" <td>-22.083</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-25227.40</td>\n",
|
||||
" <td>-25227.40</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-25227.40</td>\n",
|
||||
" <td>-25227.40</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>003</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>166</td>\n",
|
||||
" <td>406533</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>France</td>\n",
|
||||
" <td>Equity</td>\n",
|
||||
" <td>Investissement</td>\n",
|
||||
" <td>FCP</td>\n",
|
||||
" <td>NO</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2019-04-08</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-465.992</td>\n",
|
||||
" <td>-465.992</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-563775.76</td>\n",
|
||||
" <td>-563775.76</td>\n",
|
||||
" <td>0.00</td>\n",
|
||||
" <td>-563775.76</td>\n",
|
||||
" <td>-563775.76</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5 rows × 24 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
|
||||
"0 003 166 166 \n",
|
||||
"1 003 166 166 \n",
|
||||
"2 003 166 166 \n",
|
||||
"3 003 166 166 \n",
|
||||
"4 003 166 166 \n",
|
||||
"\n",
|
||||
" Registrar Account - ID Registrar Account - Region \\\n",
|
||||
"0 200127202 France \n",
|
||||
"1 406533 France \n",
|
||||
"2 406533 France \n",
|
||||
"3 406533 France \n",
|
||||
"4 406533 France \n",
|
||||
"\n",
|
||||
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
|
||||
"0 France Equity Investissement \n",
|
||||
"1 France Diversified Patrimoine \n",
|
||||
"2 France Equity Investissement \n",
|
||||
"3 France Equity Investissement \n",
|
||||
"4 France Equity Investissement \n",
|
||||
"\n",
|
||||
" Product - Legal Status Product - Is Dedie ? ... Centralisation Date \\\n",
|
||||
"0 SICAV NO ... 2020-11-05 \n",
|
||||
"1 FCP NO ... 2015-03-09 \n",
|
||||
"2 FCP NO ... 2016-10-26 \n",
|
||||
"3 FCP NO ... 2018-10-18 \n",
|
||||
"4 FCP NO ... 2019-04-08 \n",
|
||||
"\n",
|
||||
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
|
||||
"0 1636.00 0.000 1636.000 \n",
|
||||
"1 144.69 0.000 144.690 \n",
|
||||
"2 0.00 -8.321 -8.321 \n",
|
||||
"3 0.00 -22.083 -22.083 \n",
|
||||
"4 0.00 -465.992 -465.992 \n",
|
||||
"\n",
|
||||
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
|
||||
"0 280983.00 0.00 280983.00 \n",
|
||||
"1 99985.13 0.00 99985.13 \n",
|
||||
"2 0.00 -9384.76 -9384.76 \n",
|
||||
"3 0.00 -25227.40 -25227.40 \n",
|
||||
"4 0.00 -563775.76 -563775.76 \n",
|
||||
"\n",
|
||||
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
|
||||
"0 280983.00 0.00 280983.00 \n",
|
||||
"1 99985.13 0.00 99985.13 \n",
|
||||
"2 0.00 -9384.76 -9384.76 \n",
|
||||
"3 0.00 -25227.40 -25227.40 \n",
|
||||
"4 0.00 -563775.76 -563775.76 \n",
|
||||
"\n",
|
||||
"[5 rows x 24 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dg.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d4c6dc0e-a5bc-495c-a54b-96dd238f46e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filtrer les comptes techniques\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"df['Centralisation Date'] = pd.to_datetime(df['Centralisation Date'])\n",
|
||||
"dg['Centralisation Date'] = pd.to_datetime(dg['Centralisation Date'])\n",
|
||||
"df = df[~df['Registrar Account - ID'].isin(['Off Distribution','Private Clients'])]\n",
|
||||
"dg = dg[~dg['Registrar Account - ID'].isin(['Off Distribution','Private Clients'])]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a19cbae9-636f-4dc7-882c-6857daca8a11",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(4880297, 18)"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "81845380-2c3f-4bf5-81d4-4a89a3732df3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(4836569, 18)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c7abd838-cb2d-41dc-a280-6622b0937672",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Date de référence\n",
|
||||
"\n",
|
||||
"ref_date = pd.Timestamp('2025-10-31')\n",
|
||||
"\n",
|
||||
"df_ref = df[df['Centralisation Date'] == ref_date]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "688f3641-67ff-49a4-b6b0-65b64b110ff3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(24736, 18)"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_ref.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1efb0610-4eaf-427c-a15c-d926d423db76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
3110
Challenge_cleaning.ipynb
Normal file
3110
Challenge_cleaning.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -896,7 +896,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.11"
|
||||
"version": "3.13.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
|
|
@ -1450,7 +1450,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.11"
|
||||
"version": "3.13.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
|
|
@ -1228,7 +1228,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.11"
|
||||
"version": "3.13.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user