diff --git a/Simulations/identifyMixture.R b/Simulations/identifyMixture.R deleted file mode 100644 index 8e040c5..0000000 --- a/Simulations/identifyMixture.R +++ /dev/null @@ -1,136 +0,0 @@ -#' Solve label switching and identify mixture. -#' -#' @description Clustering of the draws in the point process representation (PPR) using -#' \eqn{k}-means clustering. -#' @param Func A numeric array of dimension \eqn{M \times d \times K}; data for clustering in the PPR. -#' @param Mu A numeric array of dimension \eqn{M \times r \times K}; draws of cluster means. -#' @param Eta A numeric array of dimension \eqn{M \times K}; draws of cluster sizes. -#' @param S A numeric matrix of dimension \eqn{M \times N}; draws of cluster assignments. -#' @param Sigma2 A numeric array of dimension \eqn{M \times r \times K}; draws of cluster variances. -#' @param centers An integer or a numeric matrix of dimension \eqn{K \times d}; used to initialize [stats::kmeans()]. -#' @return A named list containing: -#' * `"S"`: reordered assignments. -#' * `"Mu"`: reordered Mu matrix. -#' * `"Eta"`: reordered weights. -#' * `"Sigma2"`: reordered Sigma2 matrix. -#' * `"non_perm_rate"`: proportion of draws where the clustering did not -#' result in a permutation and hence no relabeling could be -#' performed; this is the proportion of draws discarded. -#' -#' @details The following steps are implemented: -#' * A functional of the draws of the component-specific -#' parameters (`Func`) is passed to the function. The functionals -#' of each component and iteration are stacked on top of each other in -#' order to obtain a matrix where each row corresponds to the -#' functional of one component. -#' * The functionals are clustered into \eqn{K_+} clusters using \eqn{k}-means -#' clustering. For each functional a group label is obtained. -#' * The obtained labels of the functionals are used to construct -#' a classification for each MCMC iteration. Those classifications -#' which are a permutation of \eqn{(1,\ldots,K_+)} are used to reorder -#' the Mu and Eta draws and the assignment matrix S. This results in an -#' identified mixture model. -#' * Note that only iterations resulting in permutations -#' are used for parameter estimation and deriving the final -#' partition. Those MCMC iterations where the obtained -#' classifications of the functionals are not a permutation of -#' \eqn{(1,\ldots,K_+)} are discarded as no unique assignment of functionals -#' to components can be made. If the non-permutation rate, i.e. the -#' proportion of MCMC iterations where the obtained classifications -#' of the functionals are not a permutation, is high, this is an -#' indication of a poor clustering solution, as the -#' functionals are not clearly separated. -#' -identifyMixture <- function(Func, Mu, Eta, S, Sigma2, centers) { - - # ## To be canceled: - # Func <- Mu_Kplus - # Mu <- Mu_Kplus - # Eta <- Eta_Kplus - # S <- S_Kplus - # Sigma2 <- Sigma2_Kplus - # centers <- Func_init - # ## - - K <- length(Func[1, 1, ]) - M <- length(Func[, 1, 1]) - d <- length(Func[1, , 1]) - r <- length(Mu[1, , 1]) - N <- length(S[1, ]) - - ##---------------------- step 1 - ## from the functional draws, a matrix KM of size (M*K)xr is - ## created, by putting the draws of the different clusters below - ## each other - - KM <- do.call("rbind", lapply(1:K, function(k) matrix(Func[, , k, drop = TRUE], ncol = d))) - # 'Func[, , k, drop = TRUE]' extracts the k-th matrix or slice from the 3D - # array Func. The 'drop = TRUE' parameter is used to drop any dimensions with - # a single level, ensuring that the result is a matrix. - # 'do.call("rbind", ...)' takes the list of matrices generated by 'lapply' - # and combines them row-wise into a single matrix. The 'rbind' function is - # used to bind these matrices together by rows. - colnames(KM) <- paste0("func", 1:d) - - # -# KSigma2 <- do.call("rbind", lapply(1:K, function(k) matrix(Sigma2[, , k, drop = TRUE], ncol = d))) - -# colnames(KSigma2) <- paste0("func_sigma2", 1:d) - - ##---------------------- step 2 - ## applying k-means clustering with known clusters - ## centers(=mu_func) and obtaining the classification 'class' - - cl_y <- kmeans(KM, centers = centers, nstart = 30) - class <- cl_y$cluster - - ##---------------------- step 3 - ## classification matrix Rho_m is constructed: each row - ## corresponds to one MCMC iteration and contains the labels of - ## the group where the corresponding draw was assigned to by - ## kmeans. - Rho_m <- NULL - for (l in 0:(K - 1)) { - Rho_m <- cbind(Rho_m, class[(l * M + 1):((l + 1) * M)]) - } - - ##---------------------- step 4 - ## Identifying non-permutations, i.e. those rows where the - ## sequence of group labels is not a permutation of 1:K - m_rho <- NULL - for (m in 1:M) { - if (any(sort(Rho_m[m, ]) != 1:K)) - m_rho <- c(m_rho, m) - } - non_perm_rate <- length(m_rho)/M - - ##---------------------- step 5 - ## Reordering of the draws of Mu, Eta and S using the matrix - ## Rho_m. In this way, for the permutations, a unique labeling is - ## achieved. - Mu_reord <- array(0, dim = c(M, r, K)) - Sigma2_reord <- array(0, dim = c(M, r, K)) - Eta_reord <- matrix(0, M, K) - S_reord <- matrix(0, M, N) - - for (m in 2:M) { - Mu_reord[m, , Rho_m[m, ]] <- Mu[m, , ] - Sigma2_reord[m, , Rho_m[m, ]] <- Sigma2[m, , ] - Eta_reord[m, Rho_m[m, ]] <- Eta[m, ] - S_reord[m, ] <- Rho_m[m, ][S[m, ]] - } - - ##---------------------- step 5 - ## Finally, drop draws which are not permutations: - Mu_only_perm <- Mu_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho - Sigma2_only_perm <- Sigma2_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho - Eta_only_perm <- Eta_reord[setdiff(1:M, m_rho), ] - S_only_perm <- S_reord[setdiff(1:M, m_rho), ] - - return(list(S = S_only_perm, - Mu = Mu_only_perm, - Sigma2 = Sigma2_only_perm, - Eta = Eta_only_perm, - non_perm_rate = non_perm_rate)) -} -