Supprimer identifyMixture.R

2024-08-28 15:23:17 +02:00 · 2024-08-28 15:23:17 +02:00 · 311526b597
commit 311526b597
parent 3b9f7af4da
1 changed files with 0 additions and 136 deletions
--- a/identifyMixture.R
+++ b/identifyMixture.R
@ -1,136 +0,0 @@
-#' Solve label switching and identify mixture.
-#'
-#' @description Clustering of the draws in the point process representation (PPR) using
-#'   \eqn{k}-means clustering.
-#' @param Func A numeric array of dimension \eqn{M \times d \times K}; data for clustering in the PPR.
-#' @param Mu A numeric array of dimension \eqn{M \times r \times K}; draws of cluster means.
-#' @param Eta A numeric array of dimension \eqn{M \times K}; draws of cluster sizes.
-#' @param S A numeric matrix of dimension \eqn{M \times N}; draws of cluster assignments. 
-#' @param Sigma2 A numeric array of dimension \eqn{M \times r \times K}; draws of cluster variances. 
-#' @param centers An integer or a numeric matrix of dimension \eqn{K \times d}; used to initialize [stats::kmeans()].
-#' @return A named list containing:
-#'  * `"S"`: reordered assignments.
-#'  * `"Mu"`: reordered Mu matrix.
-#'  * `"Eta"`: reordered weights.
-#'  * `"Sigma2"`: reordered Sigma2 matrix.
-#'  * `"non_perm_rate"`: proportion of draws where the clustering did not
-#'    result in a permutation and hence no relabeling could be
-#'    performed; this is the proportion of draws discarded.
-#' 
-#' @details The following steps are implemented:
-#' * A functional of the draws of the component-specific
-#'   parameters (`Func`) is passed to the function.  The functionals
-#'   of each component and iteration are stacked on top of each other in
-#'   order to obtain a matrix where each row corresponds to the
-#'   functional of one component.
-#' * The functionals are clustered into \eqn{K_+} clusters using \eqn{k}-means
-#'   clustering. For each functional a group label is obtained.
-#' * The obtained labels of the functionals are used to construct
-#'   a classification for each MCMC iteration.  Those classifications
-#'   which are a permutation of \eqn{(1,\ldots,K_+)} are used to reorder 
-#'   the Mu and Eta draws and the assignment matrix S. This results in an
-#'   identified mixture model.
-#'  * Note that only iterations resulting in permutations
-#'   are used for parameter estimation and deriving the final
-#'   partition. Those MCMC iterations where the obtained
-#'   classifications of the functionals are not a permutation of
-#'   \eqn{(1,\ldots,K_+)} are discarded as no unique assignment of functionals
-#'   to components can be made.  If the non-permutation rate, i.e. the
-#'   proportion of MCMC iterations where the obtained classifications
-#'   of the functionals are not a permutation, is high, this is an
-#'   indication of a poor clustering solution, as the
-#'   functionals are not clearly separated.
-#'
-identifyMixture <- function(Func, Mu, Eta, S, Sigma2, centers) {
-
-  # ## To be canceled:
-  # Func <- Mu_Kplus
-  # Mu <- Mu_Kplus
-  # Eta <- Eta_Kplus
-  # S <- S_Kplus
-  # Sigma2 <- Sigma2_Kplus
-  # centers <- Func_init
-  # ##
-  
-    K <- length(Func[1, 1, ])
-    M <- length(Func[, 1, 1])
-    d <- length(Func[1, , 1])
-    r <- length(Mu[1, , 1])
-    N <- length(S[1, ])
-    
-    ##---------------------- step 1
-    ## from the functional draws, a matrix KM of size (M*K)xr is
-    ## created, by putting the draws of the different clusters below
-    ## each other
-
-    KM <- do.call("rbind", lapply(1:K, function(k) matrix(Func[, , k, drop = TRUE], ncol = d)))
-                              # 'Func[, , k, drop = TRUE]' extracts the k-th matrix or slice from the 3D 
-                              # array Func. The 'drop = TRUE' parameter is used to drop any dimensions with 
-                              # a single level, ensuring that the result is a matrix.
-                              # 'do.call("rbind", ...)' takes the list of matrices generated by 'lapply' 
-                              # and combines them row-wise into a single matrix. The 'rbind' function is 
-                              # used to bind these matrices together by rows.
-    colnames(KM) <- paste0("func", 1:d)
-    
-    #
-#    KSigma2 <- do.call("rbind", lapply(1:K, function(k) matrix(Sigma2[, , k, drop = TRUE], ncol = d)))
-    
-#    colnames(KSigma2) <- paste0("func_sigma2", 1:d)
-
-    ##---------------------- step 2
-    ## applying k-means clustering with known clusters
-    ## centers(=mu_func) and obtaining the classification 'class'
-
-    cl_y <- kmeans(KM, centers = centers, nstart = 30)
-    class <- cl_y$cluster
-    
-    ##---------------------- step 3
-    ## classification matrix Rho_m is constructed: each row
-    ## corresponds to one MCMC iteration and contains the labels of
-    ## the group where the corresponding draw was assigned to by
-    ## kmeans.
-    Rho_m <- NULL
-    for (l in 0:(K - 1)) {
-        Rho_m <- cbind(Rho_m, class[(l * M + 1):((l + 1) * M)])
-    }
-
-    ##---------------------- step 4
-    ## Identifying non-permutations, i.e. those rows where the
-    ## sequence of group labels is not a permutation of 1:K
-    m_rho <- NULL
-    for (m in 1:M) {
-        if (any(sort(Rho_m[m, ]) != 1:K))
-            m_rho <- c(m_rho, m)
-    }
-    non_perm_rate <- length(m_rho)/M
-
-    ##---------------------- step 5
-    ## Reordering of the draws of Mu, Eta and S using the matrix
-    ## Rho_m.  In this way, for the permutations, a unique labeling is
-    ## achieved.
-    Mu_reord <- array(0, dim = c(M, r, K))
-    Sigma2_reord <- array(0, dim = c(M, r, K))
-    Eta_reord <- matrix(0, M, K)
-    S_reord <- matrix(0, M, N)
-    
-    for (m in 2:M) {
-        Mu_reord[m, , Rho_m[m, ]] <- Mu[m, , ]
-        Sigma2_reord[m, , Rho_m[m, ]] <- Sigma2[m, , ]
-        Eta_reord[m, Rho_m[m, ]] <- Eta[m, ]
-        S_reord[m, ] <- Rho_m[m, ][S[m, ]] 
-    }
-    
-    ##---------------------- step 5
-    ## Finally, drop draws which are not permutations:
-    Mu_only_perm <- Mu_reord[setdiff(1:M, m_rho), , ]  # will discard any duplicated values from 1:M, i.e values of m_rho
-    Sigma2_only_perm <- Sigma2_reord[setdiff(1:M, m_rho), , ]  # will discard any duplicated values from 1:M, i.e values of m_rho
-    Eta_only_perm <- Eta_reord[setdiff(1:M, m_rho), ]
-    S_only_perm <- S_reord[setdiff(1:M, m_rho), ]
-    
-    return(list(S = S_only_perm,
-                Mu = Mu_only_perm,
-                Sigma2 = Sigma2_only_perm,
-                Eta = Eta_only_perm,
-                non_perm_rate = non_perm_rate))
-}
-