2026-02-02 11:37:16 +01:00
|
|
|
import pandas as pd
|
2026-02-02 19:00:03 +01:00
|
|
|
import os
|
|
|
|
|
import s3fs
|
|
|
|
|
fs = s3fs.S3FileSystem(
|
|
|
|
|
client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},
|
|
|
|
|
key = os.environ["AWS_ACCESS_KEY_ID"],
|
|
|
|
|
secret = os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
|
|
|
token = os.environ["AWS_SESSION_TOKEN"])
|
2026-02-02 11:37:16 +01:00
|
|
|
|
2026-02-02 19:00:03 +01:00
|
|
|
def load_and_clean_data(rates_path, gov_path):
|
2026-02-08 17:06:15 +01:00
|
|
|
# Enforce string types for IDs to prevent 'Mixed Type' warnings
|
|
|
|
|
dtype_spec = {
|
|
|
|
|
'Registrar Account - ID': str,
|
|
|
|
|
'Company - Id': str,
|
|
|
|
|
'Company - Ultimate Parent Id': str,
|
|
|
|
|
'Agreement - Code': str
|
|
|
|
|
}
|
2026-02-02 19:00:03 +01:00
|
|
|
with fs.open('s3://projet-bdc-carmignac-g3/AUM_repaired.csv', 'rb') as f:
|
2026-02-08 17:06:15 +01:00
|
|
|
aum = pd.read_csv(f, sep=",", dtype=dtype_spec)
|
2026-02-02 19:00:03 +01:00
|
|
|
|
|
|
|
|
with fs.open('s3://projet-bdc-carmignac-g3/flows.csv', 'rb') as f:
|
2026-02-08 17:06:15 +01:00
|
|
|
flows = pd.read_csv(f, sep=",", dtype=dtype_spec)
|
2026-02-02 12:31:08 +01:00
|
|
|
|
2026-02-02 11:37:16 +01:00
|
|
|
flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date'])
|
|
|
|
|
aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date'])
|
2026-02-02 12:31:08 +01:00
|
|
|
|
2026-02-08 17:06:15 +01:00
|
|
|
# Market data loading (Standardizing dates)
|
|
|
|
|
print("Loading Market Data...")
|
2026-02-02 11:37:16 +01:00
|
|
|
rates = pd.read_csv(rates_path)
|
|
|
|
|
try:
|
|
|
|
|
rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True)
|
|
|
|
|
except:
|
|
|
|
|
rates['Date'] = pd.to_datetime(rates['Date'])
|
|
|
|
|
|
|
|
|
|
gov = pd.read_csv(gov_path)
|
2026-02-08 17:06:15 +01:00
|
|
|
try:
|
|
|
|
|
gov['Date'] = pd.to_datetime(gov['Date'], dayfirst=True)
|
|
|
|
|
except:
|
|
|
|
|
gov['Date'] = pd.to_datetime(gov['Date'])
|
2026-02-02 11:37:16 +01:00
|
|
|
|
|
|
|
|
return flows, aum, rates, gov
|