Supprimer Simulations/identifyMixture.R
This commit is contained in:
parent
a95196ad1d
commit
cf99b484bd
|
@ -1,136 +0,0 @@
|
||||||
#' Solve label switching and identify mixture.
|
|
||||||
#'
|
|
||||||
#' @description Clustering of the draws in the point process representation (PPR) using
|
|
||||||
#' \eqn{k}-means clustering.
|
|
||||||
#' @param Func A numeric array of dimension \eqn{M \times d \times K}; data for clustering in the PPR.
|
|
||||||
#' @param Mu A numeric array of dimension \eqn{M \times r \times K}; draws of cluster means.
|
|
||||||
#' @param Eta A numeric array of dimension \eqn{M \times K}; draws of cluster sizes.
|
|
||||||
#' @param S A numeric matrix of dimension \eqn{M \times N}; draws of cluster assignments.
|
|
||||||
#' @param Sigma2 A numeric array of dimension \eqn{M \times r \times K}; draws of cluster variances.
|
|
||||||
#' @param centers An integer or a numeric matrix of dimension \eqn{K \times d}; used to initialize [stats::kmeans()].
|
|
||||||
#' @return A named list containing:
|
|
||||||
#' * `"S"`: reordered assignments.
|
|
||||||
#' * `"Mu"`: reordered Mu matrix.
|
|
||||||
#' * `"Eta"`: reordered weights.
|
|
||||||
#' * `"Sigma2"`: reordered Sigma2 matrix.
|
|
||||||
#' * `"non_perm_rate"`: proportion of draws where the clustering did not
|
|
||||||
#' result in a permutation and hence no relabeling could be
|
|
||||||
#' performed; this is the proportion of draws discarded.
|
|
||||||
#'
|
|
||||||
#' @details The following steps are implemented:
|
|
||||||
#' * A functional of the draws of the component-specific
|
|
||||||
#' parameters (`Func`) is passed to the function. The functionals
|
|
||||||
#' of each component and iteration are stacked on top of each other in
|
|
||||||
#' order to obtain a matrix where each row corresponds to the
|
|
||||||
#' functional of one component.
|
|
||||||
#' * The functionals are clustered into \eqn{K_+} clusters using \eqn{k}-means
|
|
||||||
#' clustering. For each functional a group label is obtained.
|
|
||||||
#' * The obtained labels of the functionals are used to construct
|
|
||||||
#' a classification for each MCMC iteration. Those classifications
|
|
||||||
#' which are a permutation of \eqn{(1,\ldots,K_+)} are used to reorder
|
|
||||||
#' the Mu and Eta draws and the assignment matrix S. This results in an
|
|
||||||
#' identified mixture model.
|
|
||||||
#' * Note that only iterations resulting in permutations
|
|
||||||
#' are used for parameter estimation and deriving the final
|
|
||||||
#' partition. Those MCMC iterations where the obtained
|
|
||||||
#' classifications of the functionals are not a permutation of
|
|
||||||
#' \eqn{(1,\ldots,K_+)} are discarded as no unique assignment of functionals
|
|
||||||
#' to components can be made. If the non-permutation rate, i.e. the
|
|
||||||
#' proportion of MCMC iterations where the obtained classifications
|
|
||||||
#' of the functionals are not a permutation, is high, this is an
|
|
||||||
#' indication of a poor clustering solution, as the
|
|
||||||
#' functionals are not clearly separated.
|
|
||||||
#'
|
|
||||||
identifyMixture <- function(Func, Mu, Eta, S, Sigma2, centers) {
|
|
||||||
|
|
||||||
# ## To be canceled:
|
|
||||||
# Func <- Mu_Kplus
|
|
||||||
# Mu <- Mu_Kplus
|
|
||||||
# Eta <- Eta_Kplus
|
|
||||||
# S <- S_Kplus
|
|
||||||
# Sigma2 <- Sigma2_Kplus
|
|
||||||
# centers <- Func_init
|
|
||||||
# ##
|
|
||||||
|
|
||||||
K <- length(Func[1, 1, ])
|
|
||||||
M <- length(Func[, 1, 1])
|
|
||||||
d <- length(Func[1, , 1])
|
|
||||||
r <- length(Mu[1, , 1])
|
|
||||||
N <- length(S[1, ])
|
|
||||||
|
|
||||||
##---------------------- step 1
|
|
||||||
## from the functional draws, a matrix KM of size (M*K)xr is
|
|
||||||
## created, by putting the draws of the different clusters below
|
|
||||||
## each other
|
|
||||||
|
|
||||||
KM <- do.call("rbind", lapply(1:K, function(k) matrix(Func[, , k, drop = TRUE], ncol = d)))
|
|
||||||
# 'Func[, , k, drop = TRUE]' extracts the k-th matrix or slice from the 3D
|
|
||||||
# array Func. The 'drop = TRUE' parameter is used to drop any dimensions with
|
|
||||||
# a single level, ensuring that the result is a matrix.
|
|
||||||
# 'do.call("rbind", ...)' takes the list of matrices generated by 'lapply'
|
|
||||||
# and combines them row-wise into a single matrix. The 'rbind' function is
|
|
||||||
# used to bind these matrices together by rows.
|
|
||||||
colnames(KM) <- paste0("func", 1:d)
|
|
||||||
|
|
||||||
#
|
|
||||||
# KSigma2 <- do.call("rbind", lapply(1:K, function(k) matrix(Sigma2[, , k, drop = TRUE], ncol = d)))
|
|
||||||
|
|
||||||
# colnames(KSigma2) <- paste0("func_sigma2", 1:d)
|
|
||||||
|
|
||||||
##---------------------- step 2
|
|
||||||
## applying k-means clustering with known clusters
|
|
||||||
## centers(=mu_func) and obtaining the classification 'class'
|
|
||||||
|
|
||||||
cl_y <- kmeans(KM, centers = centers, nstart = 30)
|
|
||||||
class <- cl_y$cluster
|
|
||||||
|
|
||||||
##---------------------- step 3
|
|
||||||
## classification matrix Rho_m is constructed: each row
|
|
||||||
## corresponds to one MCMC iteration and contains the labels of
|
|
||||||
## the group where the corresponding draw was assigned to by
|
|
||||||
## kmeans.
|
|
||||||
Rho_m <- NULL
|
|
||||||
for (l in 0:(K - 1)) {
|
|
||||||
Rho_m <- cbind(Rho_m, class[(l * M + 1):((l + 1) * M)])
|
|
||||||
}
|
|
||||||
|
|
||||||
##---------------------- step 4
|
|
||||||
## Identifying non-permutations, i.e. those rows where the
|
|
||||||
## sequence of group labels is not a permutation of 1:K
|
|
||||||
m_rho <- NULL
|
|
||||||
for (m in 1:M) {
|
|
||||||
if (any(sort(Rho_m[m, ]) != 1:K))
|
|
||||||
m_rho <- c(m_rho, m)
|
|
||||||
}
|
|
||||||
non_perm_rate <- length(m_rho)/M
|
|
||||||
|
|
||||||
##---------------------- step 5
|
|
||||||
## Reordering of the draws of Mu, Eta and S using the matrix
|
|
||||||
## Rho_m. In this way, for the permutations, a unique labeling is
|
|
||||||
## achieved.
|
|
||||||
Mu_reord <- array(0, dim = c(M, r, K))
|
|
||||||
Sigma2_reord <- array(0, dim = c(M, r, K))
|
|
||||||
Eta_reord <- matrix(0, M, K)
|
|
||||||
S_reord <- matrix(0, M, N)
|
|
||||||
|
|
||||||
for (m in 2:M) {
|
|
||||||
Mu_reord[m, , Rho_m[m, ]] <- Mu[m, , ]
|
|
||||||
Sigma2_reord[m, , Rho_m[m, ]] <- Sigma2[m, , ]
|
|
||||||
Eta_reord[m, Rho_m[m, ]] <- Eta[m, ]
|
|
||||||
S_reord[m, ] <- Rho_m[m, ][S[m, ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
##---------------------- step 5
|
|
||||||
## Finally, drop draws which are not permutations:
|
|
||||||
Mu_only_perm <- Mu_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho
|
|
||||||
Sigma2_only_perm <- Sigma2_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho
|
|
||||||
Eta_only_perm <- Eta_reord[setdiff(1:M, m_rho), ]
|
|
||||||
S_only_perm <- S_reord[setdiff(1:M, m_rho), ]
|
|
||||||
|
|
||||||
return(list(S = S_only_perm,
|
|
||||||
Mu = Mu_only_perm,
|
|
||||||
Sigma2 = Sigma2_only_perm,
|
|
||||||
Eta = Eta_only_perm,
|
|
||||||
non_perm_rate = non_perm_rate))
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user