import pandas as pd import os import s3fs fs = s3fs.S3FileSystem( client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'}, key = os.environ["AWS_ACCESS_KEY_ID"], secret = os.environ["AWS_SECRET_ACCESS_KEY"], token = os.environ["AWS_SESSION_TOKEN"]) def load_and_clean_data(rates_path, gov_path): """ Loads raw CSVs and parses dates for consistent time-series analysis. """ with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f: aum = pd.read_csv(f, sep =",") with fs.open('s3://projet-bdc-carmignac-g3/flows.csv', 'rb') as f: flows = pd.read_csv(f, sep =",") flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date']) aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date']) rates = pd.read_csv(rates_path) try: rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True) except: rates['Date'] = pd.to_datetime(rates['Date']) gov = pd.read_csv(gov_path) gov['Date'] = pd.to_datetime(gov['Date']) return flows, aum, rates, gov