Project_Carmignac/clustering/data_loader.py

27 lines
882 B
Python
Raw Normal View History

2026-02-02 11:37:16 +01:00
import pandas as pd
def load_and_clean_data(flows_path, aum_path, rates_path, gov_path):
"""
Loads raw CSVs and parses dates for consistent time-series analysis.
"""
# 1. Load Flows
flows = pd.read_csv(flows_path)
flows['Centralisation Date'] = pd.to_datetime(flows['Centralisation Date'])
# 2. Load AUM
aum = pd.read_csv(aum_path)
aum['Centralisation Date'] = pd.to_datetime(aum['Centralisation Date'])
# 3. Load Market Data (STR Rates)
# Handling potential dd/mm/yyyy formats common in EU data
rates = pd.read_csv(rates_path)
try:
rates['Date'] = pd.to_datetime(rates['Date'], dayfirst=True)
except:
rates['Date'] = pd.to_datetime(rates['Date'])
# 4. Load Gov Indices
gov = pd.read_csv(gov_path)
gov['Date'] = pd.to_datetime(gov['Date'])
return flows, aum, rates, gov