2026-02-02 11:37:16 +01:00
|
|
|
import pandas as pd
|
2026-02-02 19:00:03 +01:00
|
|
|
import os
|
|
|
|
|
import s3fs
|
|
|
|
|
fs = s3fs.S3FileSystem(
|
|
|
|
|
client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},
|
|
|
|
|
key = os.environ["AWS_ACCESS_KEY_ID"],
|
|
|
|
|
secret = os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
|
|
|
token = os.environ["AWS_SESSION_TOKEN"])
|
2026-02-02 11:37:16 +01:00
|
|
|
|
2026-02-02 19:00:03 +01:00
|
|
|
def load_and_clean_data(rates_path, gov_path):
|
2026-02-02 11:37:16 +01:00
|
|
|
"""
|
|
|
|
|
Loads raw CSVs and parses dates for consistent time-series analysis.
|
|
|
|
|
"""
|
2026-02-02 19:00:03 +01:00
|
|
|
with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f:
|
|
|
|
|
aum = pd.read_csv(f, sep =",")
|
|
|
|
|
|
|
|
|
|
with fs.open('s3://projet-bdc-carmignac-g3/flows.csv', 'rb') as f:
|
|
|
|
|
flows = pd.read_csv(f, sep =",")
|
2026-02-02 12:31:08 +01:00
|
|
|
|
2026-02-02 11:37:16 +01:00
|
|
|
flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date'])
|
|
|
|
|
aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date'])
|
2026-02-02 12:31:08 +01:00
|
|
|
|
2026-02-02 11:37:16 +01:00
|
|
|
rates = pd.read_csv(rates_path)
|
|
|
|
|
try:
|
|
|
|
|
rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True)
|
|
|
|
|
except:
|
|
|
|
|
rates['Date'] = pd.to_datetime(rates['Date'])
|
|
|
|
|
|
|
|
|
|
gov = pd.read_csv(gov_path)
|
|
|
|
|
gov['Date'] = pd.to_datetime(gov['Date'])
|
|
|
|
|
|
|
|
|
|
return flows, aum, rates, gov
|