import pandas as pd def detect_ruptures(df, epsilon=0.05): # Colonnes clés pour identifier les comptes key_cols = [ 'Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id', 'Registrar Account - Region', 'RegistrarAccount - Country', 'Registrar Account - ID' ] # Travailler sur une copie df_temp = df.copy() # Colonnes de dates df_temp['Centralisation Date'] = pd.to_datetime(df_temp['Centralisation Date']) # Dates distinctes full_dates = ( pd.Series(df_temp['Centralisation Date'].unique()) .sort_values() .reset_index(drop=True) ) # Combinaisons comptes × dates accounts = df_temp[key_cols].drop_duplicates() full_index = accounts.merge( pd.DataFrame({'Centralisation Date': full_dates}), how='cross' ) # Agréger les AUM par clé agg_cols = key_cols + ['Centralisation Date'] df_agg = ( df_temp.groupby(agg_cols)['Value - AUM €'] .sum() .reset_index() ) # Merge sur toutes les combinaisons df_full = pd.merge(full_index, df_agg, on=agg_cols, how='left') # Remplissage des trous par 0 df_full['Value - AUM €'] = df_full['Value - AUM €'].fillna(0) # Tri df_full = df_full.sort_values(key_cols + ['Centralisation Date']) # Variation et valeur précédente df_full['AUM_diff'] = df_full.groupby(key_cols)['Value - AUM €'].diff().fillna(0) df_full['prev_value'] = df_full.groupby(key_cols)['Value - AUM €'].shift(1).fillna(0) # Comptes qui perdent tout df_zero = df_full[(df_full['AUM_diff'] < 0) & (df_full['Value - AUM €'] == 0)].copy() # Comptes qui partent de 0 df_from_zero = df_full[(df_full['AUM_diff'] > 0) & (df_full['prev_value'] == 0)].copy() # Colonnes pour le merge (sans ID) merge_cols = [ 'Centralisation Date', 'Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id', 'Registrar Account - Region', 'RegistrarAccount - Country' ] # Détection des ruptures ruptures = pd.merge(df_zero, df_from_zero, on=merge_cols, suffixes=('_old','_new')) # Calcul de la différence relative selon epsilon ruptures['diff_rel'] = abs(ruptures['AUM_diff_old'] + ruptures['AUM_diff_new']) / ( (abs(ruptures['AUM_diff_old']) + abs(ruptures['AUM_diff_new'])) / 2 ) # Filtrage avec epsilon ruptures = ruptures[ruptures['diff_rel'] <= epsilon].drop(columns=['diff_rel']) # Colonnes finales ruptures_df = ruptures[['Centralisation Date','Registrar Account - ID_old','Registrar Account - ID_new','AUM_diff_new']] ruptures_df.columns = ['date','old_account','new_account','value'] return ruptures_df