import pandas as pd import os import s3fs fs = s3fs.S3FileSystem( client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'}, key = os.environ["AWS_ACCESS_KEY_ID"], secret = os.environ["AWS_SECRET_ACCESS_KEY"], token = os.environ["AWS_SESSION_TOKEN"]) def load_and_clean_data(rates_path, gov_path): # Enforce string types for IDs to prevent 'Mixed Type' warnings dtype_spec = { 'Registrar Account - ID': str, 'Company - Id': str, 'Company - Ultimate Parent Id': str, 'Agreement - Code': str } with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f: aum = pd.read_csv(f, sep=",", dtype=dtype_spec) with fs.open('s3://projet-bdc-carmignac-g3/flows.csv', 'rb') as f: flows = pd.read_csv(f, sep=",", dtype=dtype_spec) flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date']) aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date']) # Market data loading (Standardizing dates) print("Loading Market Data...") rates = pd.read_csv(rates_path) try: rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True) except: rates['Date'] = pd.to_datetime(rates['Date']) gov = pd.read_csv(gov_path) try: gov['Date'] = pd.to_datetime(gov['Date'], dayfirst=True) except: gov['Date'] = pd.to_datetime(gov['Date']) return flows, aum, rates, gov