Supprimer identifyMixture.R
This commit is contained in:
parent
3b9f7af4da
commit
311526b597
|
@ -1,136 +0,0 @@
|
|||
#' Solve label switching and identify mixture.
|
||||
#'
|
||||
#' @description Clustering of the draws in the point process representation (PPR) using
|
||||
#' \eqn{k}-means clustering.
|
||||
#' @param Func A numeric array of dimension \eqn{M \times d \times K}; data for clustering in the PPR.
|
||||
#' @param Mu A numeric array of dimension \eqn{M \times r \times K}; draws of cluster means.
|
||||
#' @param Eta A numeric array of dimension \eqn{M \times K}; draws of cluster sizes.
|
||||
#' @param S A numeric matrix of dimension \eqn{M \times N}; draws of cluster assignments.
|
||||
#' @param Sigma2 A numeric array of dimension \eqn{M \times r \times K}; draws of cluster variances.
|
||||
#' @param centers An integer or a numeric matrix of dimension \eqn{K \times d}; used to initialize [stats::kmeans()].
|
||||
#' @return A named list containing:
|
||||
#' * `"S"`: reordered assignments.
|
||||
#' * `"Mu"`: reordered Mu matrix.
|
||||
#' * `"Eta"`: reordered weights.
|
||||
#' * `"Sigma2"`: reordered Sigma2 matrix.
|
||||
#' * `"non_perm_rate"`: proportion of draws where the clustering did not
|
||||
#' result in a permutation and hence no relabeling could be
|
||||
#' performed; this is the proportion of draws discarded.
|
||||
#'
|
||||
#' @details The following steps are implemented:
|
||||
#' * A functional of the draws of the component-specific
|
||||
#' parameters (`Func`) is passed to the function. The functionals
|
||||
#' of each component and iteration are stacked on top of each other in
|
||||
#' order to obtain a matrix where each row corresponds to the
|
||||
#' functional of one component.
|
||||
#' * The functionals are clustered into \eqn{K_+} clusters using \eqn{k}-means
|
||||
#' clustering. For each functional a group label is obtained.
|
||||
#' * The obtained labels of the functionals are used to construct
|
||||
#' a classification for each MCMC iteration. Those classifications
|
||||
#' which are a permutation of \eqn{(1,\ldots,K_+)} are used to reorder
|
||||
#' the Mu and Eta draws and the assignment matrix S. This results in an
|
||||
#' identified mixture model.
|
||||
#' * Note that only iterations resulting in permutations
|
||||
#' are used for parameter estimation and deriving the final
|
||||
#' partition. Those MCMC iterations where the obtained
|
||||
#' classifications of the functionals are not a permutation of
|
||||
#' \eqn{(1,\ldots,K_+)} are discarded as no unique assignment of functionals
|
||||
#' to components can be made. If the non-permutation rate, i.e. the
|
||||
#' proportion of MCMC iterations where the obtained classifications
|
||||
#' of the functionals are not a permutation, is high, this is an
|
||||
#' indication of a poor clustering solution, as the
|
||||
#' functionals are not clearly separated.
|
||||
#'
|
||||
identifyMixture <- function(Func, Mu, Eta, S, Sigma2, centers) {
|
||||
|
||||
# ## To be canceled:
|
||||
# Func <- Mu_Kplus
|
||||
# Mu <- Mu_Kplus
|
||||
# Eta <- Eta_Kplus
|
||||
# S <- S_Kplus
|
||||
# Sigma2 <- Sigma2_Kplus
|
||||
# centers <- Func_init
|
||||
# ##
|
||||
|
||||
K <- length(Func[1, 1, ])
|
||||
M <- length(Func[, 1, 1])
|
||||
d <- length(Func[1, , 1])
|
||||
r <- length(Mu[1, , 1])
|
||||
N <- length(S[1, ])
|
||||
|
||||
##---------------------- step 1
|
||||
## from the functional draws, a matrix KM of size (M*K)xr is
|
||||
## created, by putting the draws of the different clusters below
|
||||
## each other
|
||||
|
||||
KM <- do.call("rbind", lapply(1:K, function(k) matrix(Func[, , k, drop = TRUE], ncol = d)))
|
||||
# 'Func[, , k, drop = TRUE]' extracts the k-th matrix or slice from the 3D
|
||||
# array Func. The 'drop = TRUE' parameter is used to drop any dimensions with
|
||||
# a single level, ensuring that the result is a matrix.
|
||||
# 'do.call("rbind", ...)' takes the list of matrices generated by 'lapply'
|
||||
# and combines them row-wise into a single matrix. The 'rbind' function is
|
||||
# used to bind these matrices together by rows.
|
||||
colnames(KM) <- paste0("func", 1:d)
|
||||
|
||||
#
|
||||
# KSigma2 <- do.call("rbind", lapply(1:K, function(k) matrix(Sigma2[, , k, drop = TRUE], ncol = d)))
|
||||
|
||||
# colnames(KSigma2) <- paste0("func_sigma2", 1:d)
|
||||
|
||||
##---------------------- step 2
|
||||
## applying k-means clustering with known clusters
|
||||
## centers(=mu_func) and obtaining the classification 'class'
|
||||
|
||||
cl_y <- kmeans(KM, centers = centers, nstart = 30)
|
||||
class <- cl_y$cluster
|
||||
|
||||
##---------------------- step 3
|
||||
## classification matrix Rho_m is constructed: each row
|
||||
## corresponds to one MCMC iteration and contains the labels of
|
||||
## the group where the corresponding draw was assigned to by
|
||||
## kmeans.
|
||||
Rho_m <- NULL
|
||||
for (l in 0:(K - 1)) {
|
||||
Rho_m <- cbind(Rho_m, class[(l * M + 1):((l + 1) * M)])
|
||||
}
|
||||
|
||||
##---------------------- step 4
|
||||
## Identifying non-permutations, i.e. those rows where the
|
||||
## sequence of group labels is not a permutation of 1:K
|
||||
m_rho <- NULL
|
||||
for (m in 1:M) {
|
||||
if (any(sort(Rho_m[m, ]) != 1:K))
|
||||
m_rho <- c(m_rho, m)
|
||||
}
|
||||
non_perm_rate <- length(m_rho)/M
|
||||
|
||||
##---------------------- step 5
|
||||
## Reordering of the draws of Mu, Eta and S using the matrix
|
||||
## Rho_m. In this way, for the permutations, a unique labeling is
|
||||
## achieved.
|
||||
Mu_reord <- array(0, dim = c(M, r, K))
|
||||
Sigma2_reord <- array(0, dim = c(M, r, K))
|
||||
Eta_reord <- matrix(0, M, K)
|
||||
S_reord <- matrix(0, M, N)
|
||||
|
||||
for (m in 2:M) {
|
||||
Mu_reord[m, , Rho_m[m, ]] <- Mu[m, , ]
|
||||
Sigma2_reord[m, , Rho_m[m, ]] <- Sigma2[m, , ]
|
||||
Eta_reord[m, Rho_m[m, ]] <- Eta[m, ]
|
||||
S_reord[m, ] <- Rho_m[m, ][S[m, ]]
|
||||
}
|
||||
|
||||
##---------------------- step 5
|
||||
## Finally, drop draws which are not permutations:
|
||||
Mu_only_perm <- Mu_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho
|
||||
Sigma2_only_perm <- Sigma2_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho
|
||||
Eta_only_perm <- Eta_reord[setdiff(1:M, m_rho), ]
|
||||
S_only_perm <- S_reord[setdiff(1:M, m_rho), ]
|
||||
|
||||
return(list(S = S_only_perm,
|
||||
Mu = Mu_only_perm,
|
||||
Sigma2 = Sigma2_only_perm,
|
||||
Eta = Eta_only_perm,
|
||||
non_perm_rate = non_perm_rate))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user