87 lines
2.7 KiB
Python
87 lines
2.7 KiB
Python
import pandas as pd
|
||
|
||
def detect_ruptures(df, epsilon=0.05):
|
||
# Colonnes clés pour identifier les comptes
|
||
key_cols = [
|
||
'Agreement - Code',
|
||
'Company - Id',
|
||
'Company - Ultimate Parent Id',
|
||
'Registrar Account - Region',
|
||
'RegistrarAccount - Country',
|
||
'Registrar Account - ID'
|
||
]
|
||
|
||
# Travailler sur une copie
|
||
df_temp = df.copy()
|
||
|
||
# Colonnes de dates
|
||
df_temp['Centralisation Date'] = pd.to_datetime(df_temp['Centralisation Date'])
|
||
|
||
# Dates distinctes
|
||
full_dates = (
|
||
pd.Series(df_temp['Centralisation Date'].unique())
|
||
.sort_values()
|
||
.reset_index(drop=True)
|
||
)
|
||
|
||
# Combinaisons comptes × dates
|
||
accounts = df_temp[key_cols].drop_duplicates()
|
||
full_index = accounts.merge(
|
||
pd.DataFrame({'Centralisation Date': full_dates}),
|
||
how='cross'
|
||
)
|
||
|
||
# Agréger les AUM par clé
|
||
agg_cols = key_cols + ['Centralisation Date']
|
||
df_agg = (
|
||
df_temp.groupby(agg_cols)['Value - AUM €']
|
||
.sum()
|
||
.reset_index()
|
||
)
|
||
|
||
# Merge sur toutes les combinaisons
|
||
df_full = pd.merge(full_index, df_agg, on=agg_cols, how='left')
|
||
|
||
# Remplissage des trous par 0
|
||
df_full['Value - AUM €'] = df_full['Value - AUM €'].fillna(0)
|
||
|
||
# Tri
|
||
df_full = df_full.sort_values(key_cols + ['Centralisation Date'])
|
||
|
||
# Variation et valeur précédente
|
||
df_full['AUM_diff'] = df_full.groupby(key_cols)['Value - AUM €'].diff().fillna(0)
|
||
df_full['prev_value'] = df_full.groupby(key_cols)['Value - AUM €'].shift(1).fillna(0)
|
||
|
||
# Comptes qui perdent tout
|
||
df_zero = df_full[(df_full['AUM_diff'] < 0) & (df_full['Value - AUM €'] == 0)].copy()
|
||
|
||
# Comptes qui partent de 0
|
||
df_from_zero = df_full[(df_full['AUM_diff'] > 0) & (df_full['prev_value'] == 0)].copy()
|
||
|
||
# Colonnes pour le merge (sans ID)
|
||
merge_cols = [
|
||
'Centralisation Date',
|
||
'Agreement - Code',
|
||
'Company - Id',
|
||
'Company - Ultimate Parent Id',
|
||
'Registrar Account - Region',
|
||
'RegistrarAccount - Country'
|
||
]
|
||
|
||
# Détection des ruptures
|
||
ruptures = pd.merge(df_zero, df_from_zero, on=merge_cols, suffixes=('_old','_new'))
|
||
|
||
# Calcul de la différence relative selon epsilon
|
||
ruptures['diff_rel'] = abs(ruptures['AUM_diff_old'] + ruptures['AUM_diff_new']) / (
|
||
(abs(ruptures['AUM_diff_old']) + abs(ruptures['AUM_diff_new'])) / 2
|
||
)
|
||
|
||
# Filtrage avec epsilon
|
||
ruptures = ruptures[ruptures['diff_rel'] <= epsilon].drop(columns=['diff_rel'])
|
||
|
||
# Colonnes finales
|
||
ruptures_df = ruptures[['Centralisation Date','Registrar Account - ID_old','Registrar Account - ID_new','AUM_diff_new']]
|
||
ruptures_df.columns = ['date','old_account','new_account','value']
|
||
|
||
return ruptures_df
|