Project_Carmignac/clustering/features.py

108 lines
4.8 KiB
Python
Raw Normal View History

2026-02-02 11:37:16 +01:00
import pandas as pd
import statsmodels.api as sm
2026-02-02 19:00:03 +01:00
import numpy as np
2026-02-02 11:37:16 +01:00
def compute_static_features(flows_df, aum_df):
"""Generates descriptive features from Flows and AUM."""
# --- 1. Flow Dynamics ---
flow_stats = flows_df.groupby('Registrar Account - ID').agg(
total_subs=('Value € - Subscription', 'sum'),
total_reds=('Value € - Redemption', 'sum'),
net_flow_vol=('Value € - NetFlows', 'sum'),
txn_count=('Agreement - Code', 'count'),
# Tenure: Days between first and last activity
tenure_days=('Centralisation Date', lambda x: (x.max() - x.min()).days)
)
# Flow Ratio: -1 (Pure Seller) to +1 (Pure Buyer)
flow_stats['buy_sell_ratio'] = (flow_stats['total_subs'] - flow_stats['total_reds']) / \
(flow_stats['total_subs'] + flow_stats['total_reds'] + 1e-6)
# --- 2. Product Preferences ---
# Calculate % of flows going to each Asset Type
asset_pivot = flows_df.groupby(['Registrar Account - ID', 'Product - Asset Type'])['Value € - Subscription'].sum().unstack(fill_value=0)
asset_pct = asset_pivot.div(asset_pivot.sum(axis=1) + 1e-6, axis=0).add_prefix('pct_flow_')
# --- 3. AUM Stats ---
aum_stats = aum_df.groupby('Registrar Account - ID').agg(
avg_aum=('Value - AUM €', 'mean'),
aum_volatility=('Value - AUM €', 'std')
)
2026-02-02 19:00:03 +01:00
# Merge all static features
2026-02-02 11:37:16 +01:00
features = flow_stats.join(asset_pct).join(aum_stats, how='outer').fillna(0)
return features
2026-02-02 19:00:03 +01:00
def compute_market_sensitivities(flows_df, aum_df, rates_df, gov_df, freq='M'):
2026-02-02 11:37:16 +01:00
"""
Computes Beta sensitivity to Rates and Gov Bonds.
2026-02-02 19:00:03 +01:00
Filters clients based on Activity (>5 txns) and Wealth (>0 AUM).
2026-02-02 11:37:16 +01:00
"""
2026-02-02 19:00:03 +01:00
# --- 1. Prepare Market Factors ---
2026-02-02 11:37:16 +01:00
# Resample Rates (Take last value of period)
rates_res = rates_df.set_index('Date').resample(freq)['Yld to Maturity'].last()
delta_rates = rates_res.diff().rename('Delta_Rate')
# Resample Gov Bonds (Using 'EG04' 7-10Y Euro Gov as proxy)
gov_target = gov_df[gov_df['Bond/Index'] == 'EG04'].set_index('Date')
gov_target = gov_target[~gov_target.index.duplicated(keep='first')] # Dedup
# Calculate return over period
gov_res = gov_target['Total Return % 1-wk-LOC'].resample(freq).apply(lambda x: (1 + x/100).prod() - 1)
gov_res = gov_res.rename('Bond_Return')
market_factors = pd.concat([delta_rates, gov_res], axis=1).dropna()
2026-02-02 19:00:03 +01:00
# --- 2. Identify Eligible Clients (Funneling) ---
# Criterion A: Wealthy enough (Mean AUM > 1000 EUR to avoid division by zero)
mean_aum = aum_df.groupby('Registrar Account - ID')['Value - AUM €'].mean()
valid_aum_clients = mean_aum[mean_aum > 1000].index
2026-02-02 11:37:16 +01:00
2026-02-02 19:00:03 +01:00
# Criterion B: Active enough (Transactions >= 6)
txn_counts = flows_df['Registrar Account - ID'].value_counts()
active_clients = txn_counts[txn_counts >= 6].index
# Intersection: Clients who are BOTH wealthy enough and active enough
eligible_clients = list(set(valid_aum_clients) & set(active_clients))
2026-02-02 11:37:16 +01:00
2026-02-02 19:00:03 +01:00
print(f"Regression Funnel: {len(eligible_clients)} clients eligible out of {len(mean_aum)} total accounts.")
2026-02-02 11:37:16 +01:00
2026-02-02 19:00:03 +01:00
# --- 3. Run Regression on Eligible Clients ---
flows_df['Period'] = flows_df['Centralisation Date'].dt.to_period(freq).dt.to_timestamp()
client_betas = []
for client in eligible_clients:
# Get Client Flows
2026-02-02 11:37:16 +01:00
c_flows = flows_df[flows_df['Registrar Account - ID'] == client]
c_ts = c_flows.groupby('Period')['Quantity - NetFlows'].sum()
2026-02-02 19:00:03 +01:00
# Merge with market data
2026-02-02 11:37:16 +01:00
merged = pd.concat([c_ts, market_factors], axis=1, join='inner')
2026-02-02 19:00:03 +01:00
# Ensure we have enough data points for a valid regression
if len(merged) >= 6:
# Normalize Y by Client's Average AUM (Approximation of AUM_{t-1})
# Y = NetFlow_t / Mean_AUM_i
client_avg_wealth = mean_aum.loc[client]
Y = merged['Quantity - NetFlows'] / client_avg_wealth
2026-02-02 11:37:16 +01:00
X = merged[['Delta_Rate', 'Bond_Return']]
X = sm.add_constant(X)
try:
model = sm.OLS(Y, X).fit()
client_betas.append({
'Registrar Account - ID': client,
2026-02-02 19:00:03 +01:00
'alpha': model.params.get('const', 0), # Intercept (Autonomous Trend)
2026-02-02 11:37:16 +01:00
'beta_rate': model.params.get('Delta_Rate', 0),
'beta_bond': model.params.get('Bond_Return', 0),
'r_squared': model.rsquared
})
except:
continue
if not client_betas:
2026-02-02 19:00:03 +01:00
return pd.DataFrame(columns=['Registrar Account - ID', 'alpha', 'beta_rate', 'beta_bond', 'r_squared'])
2026-02-02 11:37:16 +01:00
return pd.DataFrame(client_betas).set_index('Registrar Account - ID')