Téléverser les fichiers vers "/"
This commit is contained in:
parent
df506c8331
commit
3b9f7af4da
252
Main_Monte_Carlo.R
Normal file
252
Main_Monte_Carlo.R
Normal file
|
@ -0,0 +1,252 @@
|
||||||
|
#install.packages("telescope")
|
||||||
|
#installed.packages("extraDistr")
|
||||||
|
#
|
||||||
|
rm(list = ls())
|
||||||
|
#
|
||||||
|
library("telescope")
|
||||||
|
library("stats")
|
||||||
|
library("extraDistr") # Package for the "dbnbinom" probability mass function
|
||||||
|
|
||||||
|
source("C:/Users/Anna SIMONI/CREST Dropbox/Anna Simoni/ANNA/Groupped_Panel_data/Simulations/Finite_Mixture/Code_for_our_paper/sampleUniNormMixture.R")
|
||||||
|
source("C:/Users/Anna SIMONI/CREST Dropbox/Anna Simoni/ANNA/Groupped_Panel_data/Simulations/Finite_Mixture/Code_for_our_paper/priorOnK_spec.R")
|
||||||
|
source("C:/Users/Anna SIMONI/CREST Dropbox/Anna Simoni/ANNA/Groupped_Panel_data/Simulations/Finite_Mixture/Code_for_our_paper/prior_alpha_e0.R")
|
||||||
|
source("C:/Users/Anna SIMONI/CREST Dropbox/Anna Simoni/ANNA/Groupped_Panel_data/Simulations/Finite_Mixture/Code_for_our_paper/sample_e0_alpha.R")
|
||||||
|
source("C:/Users/Anna SIMONI/CREST Dropbox/Anna Simoni/ANNA/Groupped_Panel_data/Simulations/Finite_Mixture/Code_for_our_paper/sampleK.R")
|
||||||
|
source("C:/Users/Anna SIMONI/CREST Dropbox/Anna Simoni/ANNA/Groupped_Panel_data/Simulations/Finite_Mixture/Code_for_our_paper/identifyMixture.R")
|
||||||
|
|
||||||
|
############################
|
||||||
|
## (1) Set the Parameters ##
|
||||||
|
############################
|
||||||
|
set.seed(123)
|
||||||
|
TT <- 3 # Number of time series observations
|
||||||
|
N <- 100 # Number of cross-section observations
|
||||||
|
MC <- 50 # Number of Monte Carlo iterations
|
||||||
|
#
|
||||||
|
sigma_Z <- 1 # standard deviation of the covariates Z
|
||||||
|
beta <- 0 # value of the common parameter
|
||||||
|
## Parameters of the mixture:
|
||||||
|
K <- 3 # Number of the mixture components
|
||||||
|
w_true <- c(1/3,1/3,1/3) # Vector of weights of each component of the mixture
|
||||||
|
alpha_true <- c(0,5,-5) # True values of the incidental parameter for the K mixture components
|
||||||
|
var_u <- c(1,1,1) # True values of the variance of the model error for the K mixture components
|
||||||
|
## Parameters of the MCMC sampler:
|
||||||
|
Mmax <- 10000 # the maximum number of iterations
|
||||||
|
thin <- 1 # the thinning imposed to reduce auto-correlation in the chain by only recording every `thined' observation
|
||||||
|
burnin <- 100 # the number of burn-in iterations not recorded
|
||||||
|
M <- Mmax/thin # the number of recorded observations.
|
||||||
|
|
||||||
|
Kmax <- 50 # the maximum number of components possible during sampling
|
||||||
|
Kinit <- 10 # the initial number of filled components
|
||||||
|
|
||||||
|
## Initial values:
|
||||||
|
# we use initial values corresponding to equal component sizes and half of the value of `C0` for the variances.
|
||||||
|
|
||||||
|
eta_0 <- rep(1/Kinit, Kinit)
|
||||||
|
r <- 1 # dimension
|
||||||
|
c0 <- 2 + (r-1)/2
|
||||||
|
g0 <- 0.2 + (r-1) / 2
|
||||||
|
|
||||||
|
# Initial value for beta
|
||||||
|
beta_0 <- beta
|
||||||
|
|
||||||
|
###############
|
||||||
|
## (2) Prior ##
|
||||||
|
###############
|
||||||
|
# Static specification for the weights with a fixed prior on $e_0$ where the value is set to 1.
|
||||||
|
G <- "MixStatic"
|
||||||
|
|
||||||
|
priorOn_n0 <- priorOnE0_spec("e0const", 1) # Prior on the hyperparameter of the Dirichlet prior
|
||||||
|
|
||||||
|
priorOn_K <- priorOnK_spec("BNB_143", 30) # Prior on K. This gives the function to compute log(PMF)-log(.5).
|
||||||
|
|
||||||
|
# Prior on the common coefficient beta: N(beta_prior,Omega_prior)
|
||||||
|
beta_prior <- 0
|
||||||
|
Omega_prior <- 1
|
||||||
|
|
||||||
|
# Initialization of matrices to store the results
|
||||||
|
theta_alpha <- array(,dim=c(M,Kmax,MC))
|
||||||
|
theta_Sigma2 <- array(,dim=c(M,Kmax,MC))
|
||||||
|
Beta <- matrix(,M,MC)
|
||||||
|
Eta <- array(,dim=c(M,Kmax,MC))
|
||||||
|
S_label <- array(,dim=c(M,N,MC))
|
||||||
|
Nk <- array(,dim=c(M,Kmax,MC))
|
||||||
|
K_MC <- matrix(,M,MC)
|
||||||
|
#
|
||||||
|
Kplus <- matrix(,M,MC)
|
||||||
|
Kplus_stat <- matrix(,MC,4)
|
||||||
|
K_stat <- matrix(,MC,5)
|
||||||
|
#
|
||||||
|
# Estimators
|
||||||
|
theta_alpha_hat <- matrix(,MC,Kmax)
|
||||||
|
theta_Sigma2_hat <- matrix(,MC,Kmax)
|
||||||
|
Eta_hat <- matrix(,MC,Kmax)
|
||||||
|
Beta_hat <- matrix(,MC,3)
|
||||||
|
|
||||||
|
#####################
|
||||||
|
## (3) Monte Carlo ##
|
||||||
|
#####################
|
||||||
|
for (mc in 1:MC){
|
||||||
|
#print(mc)
|
||||||
|
cat("mc = ",mc)
|
||||||
|
## (3.1) Simulate the Z and Y
|
||||||
|
#
|
||||||
|
ZZ <- rnorm(N*TT, mean = 0, sd = sigma_Z)
|
||||||
|
Z <- matrix(ZZ,TT,N)
|
||||||
|
|
||||||
|
rm(ZZ)
|
||||||
|
S_unif <- runif(N, min = 0, max = 1)
|
||||||
|
S <- matrix(,N,1) # S is the latent allocation variable
|
||||||
|
Y <- matrix(,TT,N)
|
||||||
|
|
||||||
|
for (ii in 1:N){
|
||||||
|
# Fill the latent allocation variable S
|
||||||
|
if (S_unif[ii] >= 0 & S_unif[ii] < w_true[1]) {
|
||||||
|
S[ii,1] <- 1
|
||||||
|
} else if (S_unif[ii] >= w_true[1] & S_unif[ii] < (w_true[1] + w_true[2])) {
|
||||||
|
S[ii,1] <- 2
|
||||||
|
} else {
|
||||||
|
S[ii,1] <- 3
|
||||||
|
}
|
||||||
|
u <- rnorm(TT, mean = 0, sd = sqrt(var_u[S[ii]]))
|
||||||
|
Y[,ii] <- alpha_true[S[ii]] + beta*Z[,ii] + u
|
||||||
|
}
|
||||||
|
|
||||||
|
rm(S,S_unif,u)
|
||||||
|
Y_mean <- colMeans(Y)
|
||||||
|
|
||||||
|
## (3.2) Initial values of the other parameters (not specified outside the loop):
|
||||||
|
# Component-specific priors on \alpha_k (i.e. N(b0,B0)) and \sigma_k^2 (i.e. IG(c_0,C_0)) following Richardson and Green (1997).
|
||||||
|
R <- diff(range(Y))
|
||||||
|
C0 <- diag(c(0.02*(R^2)), nrow = r)
|
||||||
|
sigma2_0 <- array(0, dim = c(1, 1, Kinit))
|
||||||
|
sigma2_0[1, 1, ] <- 0.5 * C0
|
||||||
|
G0 <- diag(10/(R^2), nrow = r)
|
||||||
|
B0 <- diag((R^2), nrow = r) # prior variance of alpha_k
|
||||||
|
b0 <- as.matrix((max(Y) + min(Y))/2, ncol = 1) # prior mean of alpha_k
|
||||||
|
|
||||||
|
## (3.3) Initial partition of the data and initial parameter values to start the MCMC.
|
||||||
|
# We use `kmeans()` to determine the initial partition $S_0$ and the initial component-specific means \mu_0.
|
||||||
|
|
||||||
|
cl_y <- kmeans(Y_mean, centers = Kinit, nstart = 30)
|
||||||
|
S_0 <- cl_y$cluster
|
||||||
|
alpha_0 <- t(cl_y$centers)
|
||||||
|
|
||||||
|
## (3.3) MCMC sampling
|
||||||
|
# We draw samples from the posterior using the telescoping sampler of Fruhwirth-Schnatter.
|
||||||
|
estGibbs <- sampleUniNormMixture(
|
||||||
|
Y, Z, S_0, alpha_0, sigma2_0, eta_0, beta_0,
|
||||||
|
c0, g0, G0, C0, b0, B0, beta_prior, Omega_prior,
|
||||||
|
M, burnin, thin, Kmax,
|
||||||
|
G, priorOn_K, priorOn_n0)
|
||||||
|
|
||||||
|
#The sampling function returns a named list where the sampled parameters and latent variables are contained.
|
||||||
|
theta_alpha[,,mc] <- estGibbs$Mu
|
||||||
|
theta_Sigma2[,,mc] <- estGibbs$Sigma2
|
||||||
|
Beta[,mc] <- estGibbs$Beta
|
||||||
|
Eta[,,mc] <- estGibbs$Eta
|
||||||
|
S_label[,,mc] <- estGibbs$S
|
||||||
|
Nk[,,mc] <- estGibbs$Nk
|
||||||
|
K_MC[,mc] <- estGibbs$K
|
||||||
|
Kplus[,mc] <- estGibbs$Kp
|
||||||
|
nonnormpost_mode_list <- estGibbs$nonnormpost_mode_list
|
||||||
|
acc <- estGibbs$acc
|
||||||
|
e0 <- estGibbs$e0
|
||||||
|
alpha_Dir <- estGibbs$alpha
|
||||||
|
|
||||||
|
#########################################
|
||||||
|
## Identification of the mixture model ##
|
||||||
|
#########################################
|
||||||
|
## Step 1: Estimating $K_+$ and $K$
|
||||||
|
## K_+ ##
|
||||||
|
Nk_mc <- Nk[,,mc]
|
||||||
|
Kplus_mc <- rowSums(Nk_mc != 0, na.rm = TRUE)
|
||||||
|
p_Kplus <- tabulate(Kplus_mc, nbins = max(Kplus_mc))/M
|
||||||
|
# The distribution of $K_+$ is characterized using the 1st and 3rd quartile as well as the median.
|
||||||
|
Kplus_stat[mc,1:3] <- quantile(Kplus_mc, probs = c(0.25, 0.5, 0.75))
|
||||||
|
|
||||||
|
# Point estimate for K_+ by taking the mode and determine the number of MCMC draws where exactly K_+
|
||||||
|
# components were filled.
|
||||||
|
Kplus_hat <- which.max(p_Kplus)
|
||||||
|
Kplus_stat[mc,4] <- Kplus_hat
|
||||||
|
M0 <- sum(Kplus_mc == Kplus_hat)
|
||||||
|
|
||||||
|
## K ##
|
||||||
|
# We also determine the posterior distribution of the number of components K directly drawn using the
|
||||||
|
# telescoping sampler.
|
||||||
|
K_mc <- K_MC[,mc]
|
||||||
|
p_K <- tabulate(K_mc, nbins = max(K_mc, na.rm = TRUE))/M
|
||||||
|
round(p_K[1:20], digits = 2)
|
||||||
|
|
||||||
|
# The posterior mode can be determined as well as the posterior mean and quantiles of the posterior.
|
||||||
|
K_hat <- which.max(tabulate(K_mc, nbins = max(K_mc)))
|
||||||
|
K_stat[mc,4] <- K_hat
|
||||||
|
K_stat[mc,5] <- mean(K_mc)
|
||||||
|
K_stat[mc,1:3] <- quantile(K_mc, probs = c(0.25, 0.5, 0.75))
|
||||||
|
|
||||||
|
## Step 2: Extracting the draws with exactly \hat{K}_+ non-empty components
|
||||||
|
# Select those draws where the number of filled components was exactly \hat{K}_+:
|
||||||
|
index <- Kplus_mc == Kplus_hat
|
||||||
|
Nk_mc[is.na(Nk_mc)] <- 0
|
||||||
|
Nk_Kplus <- (Nk_mc[index, ] > 0)
|
||||||
|
|
||||||
|
# We extract the cluster means, data cluster sizes and cluster assignments for the draws where exactly
|
||||||
|
# \hat{K}_+ components were filled.
|
||||||
|
Mu_inter <- estGibbs$Mu[index, , , drop = FALSE]
|
||||||
|
Mu_Kplus <- array(0, dim = c(M0, 1, Kplus_hat))
|
||||||
|
Mu_Kplus[, 1, ] <- Mu_inter[, 1, ][Nk_Kplus] # Here, 'Nk_Kplus' is used to subset the extracted
|
||||||
|
# elements from Mu_inter. It selects specific rows
|
||||||
|
# from the first column of Mu_inter based on the
|
||||||
|
# indices in Nk_Kplus.
|
||||||
|
|
||||||
|
Sigma2_inter <- estGibbs$Sigma2[index, , , drop = FALSE]
|
||||||
|
Sigma2_Kplus <- array(0, dim = c(M0, 1, Kplus_hat))
|
||||||
|
Sigma2_Kplus[, 1, ] <- Sigma2_inter[, 1, ][Nk_Kplus]
|
||||||
|
|
||||||
|
Eta_inter <- estGibbs$Eta[index, ]
|
||||||
|
Eta_Kplus <- matrix(Eta_inter[Nk_Kplus], ncol = Kplus_hat)
|
||||||
|
Eta_Kplus <- sweep(Eta_Kplus, 1, rowSums(Eta_Kplus), "/") # it normalizes the weights eta
|
||||||
|
|
||||||
|
w <- which(index)
|
||||||
|
S_Kplus <- matrix(0, M0, ncol(Y))
|
||||||
|
for (i in seq_along(w)) {
|
||||||
|
m <- w[i]
|
||||||
|
perm_S <- rep(0, Kmax)
|
||||||
|
perm_S[Nk_mc[m, ] != 0] <- 1:Kplus_hat # It assigns the sequence '1:Kplus_hat' to the positions in
|
||||||
|
# perm_S where Nk[m, ] is not zero.
|
||||||
|
S_Kplus[i, ] <- perm_S[S_label[m, ,mc]] # 'perm_S[S[m, ]]' indexes the vector 'perm_S' using the
|
||||||
|
# values in S[m, ]. This permutes or reassigns the values in
|
||||||
|
# 'S[m, ]' based on the mapping in 'perm_S'.
|
||||||
|
}
|
||||||
|
|
||||||
|
## Step 3: Clustering and relabeling of the MCMC draws in the point process representation
|
||||||
|
# For model identification, we cluster the draws of the means where exactly \hat{K}_+ components were
|
||||||
|
# filled in the point process representation using k-means clustering.
|
||||||
|
Func_init <- nonnormpost_mode_list[[Kplus_hat]]$mu
|
||||||
|
identified_Kplus <- identifyMixture(
|
||||||
|
Mu_Kplus, Mu_Kplus, Eta_Kplus, S_Kplus, Sigma2_Kplus, Func_init)
|
||||||
|
|
||||||
|
#A named list is returned which contains the proportion of draws where the clustering did not result in a
|
||||||
|
# permutation and hence no relabeling could be performed and the draws had to be omitted.
|
||||||
|
identified_Kplus$non_perm_rate
|
||||||
|
|
||||||
|
|
||||||
|
## Step 4: Estimating data cluster specific parameters and determining the final partition
|
||||||
|
# The relabeled draws are also returned which can be used to determine posterior mean values for data
|
||||||
|
# cluster specific parameters.
|
||||||
|
Mu_mean <- colMeans(identified_Kplus$Mu)
|
||||||
|
theta_alpha_hat[mc,1:length(Mu_mean)] <- colMeans(identified_Kplus$Mu)
|
||||||
|
theta_Sigma2_hat[mc,1:length(Mu_mean)] <- colMeans(identified_Kplus$Sigma2)
|
||||||
|
Eta_hat[mc,1:length(colMeans(identified_Kplus$Eta))] <- colMeans(identified_Kplus$Eta)
|
||||||
|
Beta_hat[mc,1] <- mean(Beta[,mc])
|
||||||
|
Beta_hat[mc,2:3] <- quantile(Beta[,mc], probs = c(0.025,0.975), na.rm =TRUE)
|
||||||
|
|
||||||
|
rm(R, C0, sigma2_0, G0, B0, b0, cl_y, S_0, alpha_0, estGibbs, nonnormpost_mode_list, acc, e0, alpha_Dir,
|
||||||
|
Kplus_mc, Nk_mc, p_Kplus, Kplus_hat, K_mc, index, Mu_inter, Mu_Kplus, Nk_Kplus, Sigma2_inter,
|
||||||
|
Sigma2_Kplus, Eta_inter, Eta_Kplus, w, S_Kplus, perm_S, Func_init, identified_Kplus, Mu_mean)
|
||||||
|
}
|
||||||
|
|
||||||
|
MSE_beta = mean(Beta_hat[,1] - beta)^2
|
||||||
|
MSE_alpha = colMeans((theta_alpha_hat[,1:3] - matrix(rep(alpha_true,MC), ncol=3, byrow=TRUE))^2)
|
||||||
|
MSE_eta = colMeans((Eta_hat[,1:3] - matrix(rep(w_true,MC), ncol=3, byrow=TRUE))^2)
|
||||||
|
|
||||||
|
|
136
identifyMixture.R
Normal file
136
identifyMixture.R
Normal file
|
@ -0,0 +1,136 @@
|
||||||
|
#' Solve label switching and identify mixture.
|
||||||
|
#'
|
||||||
|
#' @description Clustering of the draws in the point process representation (PPR) using
|
||||||
|
#' \eqn{k}-means clustering.
|
||||||
|
#' @param Func A numeric array of dimension \eqn{M \times d \times K}; data for clustering in the PPR.
|
||||||
|
#' @param Mu A numeric array of dimension \eqn{M \times r \times K}; draws of cluster means.
|
||||||
|
#' @param Eta A numeric array of dimension \eqn{M \times K}; draws of cluster sizes.
|
||||||
|
#' @param S A numeric matrix of dimension \eqn{M \times N}; draws of cluster assignments.
|
||||||
|
#' @param Sigma2 A numeric array of dimension \eqn{M \times r \times K}; draws of cluster variances.
|
||||||
|
#' @param centers An integer or a numeric matrix of dimension \eqn{K \times d}; used to initialize [stats::kmeans()].
|
||||||
|
#' @return A named list containing:
|
||||||
|
#' * `"S"`: reordered assignments.
|
||||||
|
#' * `"Mu"`: reordered Mu matrix.
|
||||||
|
#' * `"Eta"`: reordered weights.
|
||||||
|
#' * `"Sigma2"`: reordered Sigma2 matrix.
|
||||||
|
#' * `"non_perm_rate"`: proportion of draws where the clustering did not
|
||||||
|
#' result in a permutation and hence no relabeling could be
|
||||||
|
#' performed; this is the proportion of draws discarded.
|
||||||
|
#'
|
||||||
|
#' @details The following steps are implemented:
|
||||||
|
#' * A functional of the draws of the component-specific
|
||||||
|
#' parameters (`Func`) is passed to the function. The functionals
|
||||||
|
#' of each component and iteration are stacked on top of each other in
|
||||||
|
#' order to obtain a matrix where each row corresponds to the
|
||||||
|
#' functional of one component.
|
||||||
|
#' * The functionals are clustered into \eqn{K_+} clusters using \eqn{k}-means
|
||||||
|
#' clustering. For each functional a group label is obtained.
|
||||||
|
#' * The obtained labels of the functionals are used to construct
|
||||||
|
#' a classification for each MCMC iteration. Those classifications
|
||||||
|
#' which are a permutation of \eqn{(1,\ldots,K_+)} are used to reorder
|
||||||
|
#' the Mu and Eta draws and the assignment matrix S. This results in an
|
||||||
|
#' identified mixture model.
|
||||||
|
#' * Note that only iterations resulting in permutations
|
||||||
|
#' are used for parameter estimation and deriving the final
|
||||||
|
#' partition. Those MCMC iterations where the obtained
|
||||||
|
#' classifications of the functionals are not a permutation of
|
||||||
|
#' \eqn{(1,\ldots,K_+)} are discarded as no unique assignment of functionals
|
||||||
|
#' to components can be made. If the non-permutation rate, i.e. the
|
||||||
|
#' proportion of MCMC iterations where the obtained classifications
|
||||||
|
#' of the functionals are not a permutation, is high, this is an
|
||||||
|
#' indication of a poor clustering solution, as the
|
||||||
|
#' functionals are not clearly separated.
|
||||||
|
#'
|
||||||
|
identifyMixture <- function(Func, Mu, Eta, S, Sigma2, centers) {
|
||||||
|
|
||||||
|
# ## To be canceled:
|
||||||
|
# Func <- Mu_Kplus
|
||||||
|
# Mu <- Mu_Kplus
|
||||||
|
# Eta <- Eta_Kplus
|
||||||
|
# S <- S_Kplus
|
||||||
|
# Sigma2 <- Sigma2_Kplus
|
||||||
|
# centers <- Func_init
|
||||||
|
# ##
|
||||||
|
|
||||||
|
K <- length(Func[1, 1, ])
|
||||||
|
M <- length(Func[, 1, 1])
|
||||||
|
d <- length(Func[1, , 1])
|
||||||
|
r <- length(Mu[1, , 1])
|
||||||
|
N <- length(S[1, ])
|
||||||
|
|
||||||
|
##---------------------- step 1
|
||||||
|
## from the functional draws, a matrix KM of size (M*K)xr is
|
||||||
|
## created, by putting the draws of the different clusters below
|
||||||
|
## each other
|
||||||
|
|
||||||
|
KM <- do.call("rbind", lapply(1:K, function(k) matrix(Func[, , k, drop = TRUE], ncol = d)))
|
||||||
|
# 'Func[, , k, drop = TRUE]' extracts the k-th matrix or slice from the 3D
|
||||||
|
# array Func. The 'drop = TRUE' parameter is used to drop any dimensions with
|
||||||
|
# a single level, ensuring that the result is a matrix.
|
||||||
|
# 'do.call("rbind", ...)' takes the list of matrices generated by 'lapply'
|
||||||
|
# and combines them row-wise into a single matrix. The 'rbind' function is
|
||||||
|
# used to bind these matrices together by rows.
|
||||||
|
colnames(KM) <- paste0("func", 1:d)
|
||||||
|
|
||||||
|
#
|
||||||
|
# KSigma2 <- do.call("rbind", lapply(1:K, function(k) matrix(Sigma2[, , k, drop = TRUE], ncol = d)))
|
||||||
|
|
||||||
|
# colnames(KSigma2) <- paste0("func_sigma2", 1:d)
|
||||||
|
|
||||||
|
##---------------------- step 2
|
||||||
|
## applying k-means clustering with known clusters
|
||||||
|
## centers(=mu_func) and obtaining the classification 'class'
|
||||||
|
|
||||||
|
cl_y <- kmeans(KM, centers = centers, nstart = 30)
|
||||||
|
class <- cl_y$cluster
|
||||||
|
|
||||||
|
##---------------------- step 3
|
||||||
|
## classification matrix Rho_m is constructed: each row
|
||||||
|
## corresponds to one MCMC iteration and contains the labels of
|
||||||
|
## the group where the corresponding draw was assigned to by
|
||||||
|
## kmeans.
|
||||||
|
Rho_m <- NULL
|
||||||
|
for (l in 0:(K - 1)) {
|
||||||
|
Rho_m <- cbind(Rho_m, class[(l * M + 1):((l + 1) * M)])
|
||||||
|
}
|
||||||
|
|
||||||
|
##---------------------- step 4
|
||||||
|
## Identifying non-permutations, i.e. those rows where the
|
||||||
|
## sequence of group labels is not a permutation of 1:K
|
||||||
|
m_rho <- NULL
|
||||||
|
for (m in 1:M) {
|
||||||
|
if (any(sort(Rho_m[m, ]) != 1:K))
|
||||||
|
m_rho <- c(m_rho, m)
|
||||||
|
}
|
||||||
|
non_perm_rate <- length(m_rho)/M
|
||||||
|
|
||||||
|
##---------------------- step 5
|
||||||
|
## Reordering of the draws of Mu, Eta and S using the matrix
|
||||||
|
## Rho_m. In this way, for the permutations, a unique labeling is
|
||||||
|
## achieved.
|
||||||
|
Mu_reord <- array(0, dim = c(M, r, K))
|
||||||
|
Sigma2_reord <- array(0, dim = c(M, r, K))
|
||||||
|
Eta_reord <- matrix(0, M, K)
|
||||||
|
S_reord <- matrix(0, M, N)
|
||||||
|
|
||||||
|
for (m in 2:M) {
|
||||||
|
Mu_reord[m, , Rho_m[m, ]] <- Mu[m, , ]
|
||||||
|
Sigma2_reord[m, , Rho_m[m, ]] <- Sigma2[m, , ]
|
||||||
|
Eta_reord[m, Rho_m[m, ]] <- Eta[m, ]
|
||||||
|
S_reord[m, ] <- Rho_m[m, ][S[m, ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
##---------------------- step 5
|
||||||
|
## Finally, drop draws which are not permutations:
|
||||||
|
Mu_only_perm <- Mu_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho
|
||||||
|
Sigma2_only_perm <- Sigma2_reord[setdiff(1:M, m_rho), , ] # will discard any duplicated values from 1:M, i.e values of m_rho
|
||||||
|
Eta_only_perm <- Eta_reord[setdiff(1:M, m_rho), ]
|
||||||
|
S_only_perm <- S_reord[setdiff(1:M, m_rho), ]
|
||||||
|
|
||||||
|
return(list(S = S_only_perm,
|
||||||
|
Mu = Mu_only_perm,
|
||||||
|
Sigma2 = Sigma2_only_perm,
|
||||||
|
Eta = Eta_only_perm,
|
||||||
|
non_perm_rate = non_perm_rate))
|
||||||
|
}
|
||||||
|
|
144
priorOnK_spec.R
Normal file
144
priorOnK_spec.R
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
#' Specify prior on \eqn{K}.
|
||||||
|
#'
|
||||||
|
#' @description Obtain a function to evaluate the log prior
|
||||||
|
#' specified for \eqn{K}.
|
||||||
|
#' @param P A character indicating which specification should be
|
||||||
|
#' used. See Details for suitable values.
|
||||||
|
#' @param K A numeric or integer scalar specifying the fixed (if `P`
|
||||||
|
#' equals `"fixedK"`) or maximum value (if `P` equals `"Unif"`) of
|
||||||
|
#' \eqn{K}.
|
||||||
|
#' @return A named list containing:
|
||||||
|
#' * `"log_pK"`: a function of the log prior of \eqn{K}.
|
||||||
|
#' * `"param"`: a list with the parameters.
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#' The following prior specifications are supported:
|
||||||
|
#' * `"fixedK"`: K has the fixed value K (second argument).
|
||||||
|
#' * `"Unif"`: \eqn{K \sim} Unif\eqn{[1,K]}, where the upper limit is given by K (second argument).
|
||||||
|
#' * `"BNB_111"`: \eqn{K-1 \sim} BNB(1,1,1), i.e., \eqn{K-1} follows a beta-negative binomial distribution with parameters \eqn{(1,1,1)}.
|
||||||
|
#' * `"BNB_121"`: \eqn{K-1 \sim} BNB(1,2,1), i.e., \eqn{K-1} follows a beta-negative binomial distribution with parameters \eqn{(1,2,1)}.
|
||||||
|
#' * `"BNB_143"`: \eqn{K-1 \sim} BNB(1,2,1), i.e., \eqn{K-1} follows a beta-negative binomial distribution with parameters \eqn{(1,4,3)}.
|
||||||
|
#' * `"BNB_443"`: \eqn{K-1 \sim} BNB(4,4,3), i.e., \eqn{K-1} follows a beta-negative binomial distribution with parameters \eqn{(4,4,3)}.
|
||||||
|
#' * `"BNB_943"`: \eqn{K-1 \sim} BNB(9,4,3), i.e., \eqn{K-1} follows a beta-negative binomial distribution with parameters \eqn{(9,4,3)}.
|
||||||
|
#' * `"Pois_1"`: \eqn{K-1 \sim} pois(1), i.e., \eqn{K-1} follows a Poisson distribution with rate 1.
|
||||||
|
#' * `"Pois_4"`: \eqn{K-1 \sim} pois(4), i.e., \eqn{K-1} follows a Poisson distribution with rate 4.
|
||||||
|
#' * `"Pois_9"`: \eqn{K-1 \sim} pois(9), i.e., \eqn{K-1} follows a Poisson distribution with rate 9.
|
||||||
|
#' * `"Geom_05"`: \eqn{K-1 \sim} geom(0.5), i.e., \eqn{K-1} follows a geometric distribution with success probability \eqn{p=0.5} and density \eqn{f(x)=p(1-p)^x}.
|
||||||
|
#' * `"Geom_02"`: \eqn{K-1 \sim} geom(0.2), i.e., \eqn{K-1} follows a geometric distribution with success probability \eqn{p=0.2} and density \eqn{f(x)=p(1-p)^x}.
|
||||||
|
#' * `"Geom_01"`: \eqn{K-1 \sim} geom(0.1), i.e., \eqn{K-1} follows a geometric distribution with success probability \eqn{p=0.1} and density \eqn{f(x)=p(1-p)^x}.
|
||||||
|
#' * `"NB_11"`: \eqn{K-1 \sim} nbinom(1,0.5), i.e., \eqn{K-1} follows a negative-binomial distribution with \eqn{size=1} and \eqn{p=0.5}.
|
||||||
|
#' * `"NB_41"`: \eqn{K-1 \sim} nbinom(4,0.5), i.e., \eqn{K-1} follows a negative-binomial distribution with \eqn{size=4} and \eqn{p=0.5}.
|
||||||
|
#' * `"NB_91"`: \eqn{K-1 \sim} nbinom(9,0.5), i.e., \eqn{K-1} follows a negative-binomial distribution with \eqn{size=9} and \eqn{p=0.5}.
|
||||||
|
#'
|
||||||
|
priorOnK_spec <- function(P = c("fixedK", "Unif",
|
||||||
|
"BNB_111", "BNB_121", "BNB_143", "BNB_443", "BNB_943",
|
||||||
|
"Pois_1", "Pois_4", "Pois_9",
|
||||||
|
"Geom_05", "Geom_02", "Geom_01",
|
||||||
|
"NB_11", "NB_41", "NB_91"), K) {
|
||||||
|
P <- match.arg(P)
|
||||||
|
if (P %in% c("fixedK", "Unif")) {
|
||||||
|
stopifnot(is.numeric(K), length(K) == 1, K >= 1)
|
||||||
|
K <- as.integer(K)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (P == "fixedK") {
|
||||||
|
param <- list(K_0 = K)
|
||||||
|
log_pK <-function (x) log(x == K)
|
||||||
|
}
|
||||||
|
if (P == "Unif") {
|
||||||
|
param <- list(Kmax = K)
|
||||||
|
log_pK <-function (x) log(1/K)
|
||||||
|
}
|
||||||
|
if (P == "BNB_111") {
|
||||||
|
alpha.B <- 1
|
||||||
|
a_pi <- 1
|
||||||
|
b_pi <- 1
|
||||||
|
param <- list(alpha.B = alpha.B, a_pi = a_pi, b_pi = b_pi)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dbnbinom(x, size = alpha.B, alpha = a_pi, beta = b_pi, log = TRUE) - log(0.5)
|
||||||
|
}
|
||||||
|
if (P == "BNB_212") {
|
||||||
|
alpha.B <- 2
|
||||||
|
a_pi <- 1
|
||||||
|
b_pi <- 2
|
||||||
|
param <- list(alpha.B = alpha.B, a_pi = a_pi, b_pi = b_pi)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dbnbinom(x-1, size = alpha.B, alpha = a_pi, beta = b_pi, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "BNB_143") {
|
||||||
|
alpha.B <- 1
|
||||||
|
a_pi <- 4
|
||||||
|
b_pi <- 3
|
||||||
|
param <- list(alpha.B = alpha.B, a_pi = a_pi, b_pi = b_pi)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dbnbinom(x-1, size = alpha.B, alpha = a_pi, beta = b_pi, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "BNB_443") {
|
||||||
|
alpha.B <- 4
|
||||||
|
a_pi <- 4
|
||||||
|
b_pi <- 3
|
||||||
|
param <- list(alpha.B = alpha.B, a_pi = a_pi, b_pi = b_pi)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dbnbinom(x-1, size = alpha.B, alpha = a_pi, beta = b_pi, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "BNB_943") {
|
||||||
|
alpha.B <- 9
|
||||||
|
a_pi <- 4
|
||||||
|
b_pi <- 3
|
||||||
|
param <- list(alpha.B = alpha.B, a_pi = a_pi, b_pi = b_pi)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dbnbinom(x-1, size = alpha.B, alpha = a_pi, beta = b_pi, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "Pois_1") {
|
||||||
|
lambda <- 1
|
||||||
|
param <- list(lambda = lambda)
|
||||||
|
log_pK <- function(x) dpois(x-1, lambda, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "Pois_4") {
|
||||||
|
lambda <- 4
|
||||||
|
param <- list(lambda = lambda)
|
||||||
|
log_pK <- function(x) dpois(x-1, lambda, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "Pois_9") {
|
||||||
|
lambda <- 9
|
||||||
|
param <- list(lambda = lambda)
|
||||||
|
log_pK <- function(x) dpois(x-1, lambda, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "Geom_05") {
|
||||||
|
p_geom <- 0.5
|
||||||
|
param <- list(p_geom = p_geom)
|
||||||
|
log_pK <- function(x) dgeom(x-1, p_geom, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "Geom_02") {
|
||||||
|
p_geom <- 0.2
|
||||||
|
param <- list(p_geom = p_geom)
|
||||||
|
log_pK <- function(x) dgeom(x-1, p_geom, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "Geom_01") {
|
||||||
|
p_geom <- 0.1
|
||||||
|
param <- list(p_geom = p_geom)
|
||||||
|
log_pK <- function(x) dgeom(x-1, p_geom, log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "NB_11") {
|
||||||
|
alpha.nb <- 1
|
||||||
|
beta.nb <- 1
|
||||||
|
param <- list(alpha.nb = alpha.nb, beta.nb = beta.nb)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dnbinom(x-1, size = alpha.nb, prob = beta.nb/(beta.nb+1), log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "NB_41") {
|
||||||
|
alpha.nb <- 4
|
||||||
|
beta.nb <- 1
|
||||||
|
param <- list(alpha.nb = alpha.nb, beta.nb = beta.nb)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dnbinom(x-1, size = alpha.nb, prob = beta.nb/(beta.nb+1), log = TRUE)
|
||||||
|
}
|
||||||
|
if (P == "NB_91") {
|
||||||
|
alpha.nb <- 9
|
||||||
|
beta.nb <- 1
|
||||||
|
param <- list(alpha.nb = alpha.nb, beta.nb = beta.nb)
|
||||||
|
log_pK <- function (x)
|
||||||
|
dnbinom(x-1, size = alpha.nb, prob = beta.nb/(beta.nb+1), log = TRUE)
|
||||||
|
}
|
||||||
|
return(list(log_pK = log_pK, param = param))
|
||||||
|
}
|
81
prior_alpha_e0.R
Normal file
81
prior_alpha_e0.R
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
#' Specify prior on \eqn{\alpha}.
|
||||||
|
#'
|
||||||
|
#' @description Obtain a function to evaluate the log prior specified
|
||||||
|
#' for \eqn{\alpha}.
|
||||||
|
#'
|
||||||
|
#' @param H A character indicating which specification should be used.
|
||||||
|
#' @return A named list containing:
|
||||||
|
#' * `"log_pAlpha"`: a function of the log prior of \eqn{\alpha}.
|
||||||
|
#' * `"param"`: a list with the parameters.
|
||||||
|
#' @details
|
||||||
|
#' The following prior specifications are supported:
|
||||||
|
#' * `"alpha_const"`: \eqn{\alpha} is fixed at 1.
|
||||||
|
#' * `"gam_05_05"`: \eqn{\alpha \sim} gamma(0.5, 0.5), i.e., shape = 0.5, rate = 0.5.
|
||||||
|
#' * `"gam_1_2"`: \eqn{\alpha \sim} gamma(1, 2), i.e., shape = 1, rate = 2.
|
||||||
|
#' * `"F_6_3"`: \eqn{\alpha \sim} F(6, 3), i.e., an F-distribution with degrees of freedom equal to 6 and 3.
|
||||||
|
#'
|
||||||
|
priorOnAlpha_spec <- function(H = c("alpha_const", "gam_05_05", "gam_1_2", "F_6_3")) {
|
||||||
|
H <- match.arg(H)
|
||||||
|
if (H == "alpha_const") {
|
||||||
|
param <- list(a_alpha = 1,
|
||||||
|
b_alpha = 1,
|
||||||
|
s0_proposal = 1.5)
|
||||||
|
param$alpha <- with(param, a_alpha / b_alpha)
|
||||||
|
log_pAlpha <- function(x) log(x == param$alpha)
|
||||||
|
}
|
||||||
|
if (H == "gam_05_05") {
|
||||||
|
param <- list(a_alpha = 0.5,
|
||||||
|
b_alpha = 0.5,
|
||||||
|
s0_proposal = 1.5)
|
||||||
|
param$alpha <- with(param, a_alpha / b_alpha)
|
||||||
|
log_pAlpha <- function(x)
|
||||||
|
dgamma(x, shape = param$a_alpha, rate = param$b_alpha, log = TRUE)
|
||||||
|
}
|
||||||
|
if (H == "gam_1_2") {
|
||||||
|
param <- list(a_alpha = 1,
|
||||||
|
b_alpha = 2,
|
||||||
|
s0_proposal = 1.5)
|
||||||
|
param$alpha <- with(param, a_alpha / b_alpha)
|
||||||
|
log_pAlpha <- function(x)
|
||||||
|
dgamma(x, shape = param$a_alpha, rate = param$b_alpha, log = TRUE)
|
||||||
|
}
|
||||||
|
if (H == "F_6_3") {
|
||||||
|
param <- list(a_alpha = 6,
|
||||||
|
b_alpha = 3,
|
||||||
|
alpha = 1,
|
||||||
|
s0_proposal = 2.5)
|
||||||
|
log_pAlpha <- function(x)
|
||||||
|
df(x, df1 = param$a_alpha, df2 = param$b_alpha, log = TRUE)
|
||||||
|
}
|
||||||
|
return(list(log_pAlpha = log_pAlpha,
|
||||||
|
param = param))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Specify prior on e0.
|
||||||
|
#'
|
||||||
|
#' @description Obtain a function to evaluate the log prior specified
|
||||||
|
#' for \eqn{e_0}.
|
||||||
|
#'
|
||||||
|
#' @param E A character indicating which specification should be used.
|
||||||
|
#' @param e0 A numeric scalar giving the fixed value of \eqn{e_0}.
|
||||||
|
#' @return A named list containing:
|
||||||
|
#' * `"log_p_e0"`: a function of the log prior of \eqn{e_0}.
|
||||||
|
#' * `"param"`: a list with the parameters.
|
||||||
|
#' @details
|
||||||
|
#' The following prior specifications are supported:
|
||||||
|
#' * `"G_1_20"`: \eqn{e_0 \sim} gamma(1, 20), i.e., shape = 1, rate = 20.
|
||||||
|
#' * `"e0const"`: \eqn{e_0} is fixed at `e0`.
|
||||||
|
priorOnE0_spec <- function(E = c("G_1_20", "e0const"), e0) {
|
||||||
|
E <- match.arg(E)
|
||||||
|
param <- list(b_alpha = 1,
|
||||||
|
e0 = e0,
|
||||||
|
s0_proposal = 1.5)
|
||||||
|
if (E == "G_1_20") {
|
||||||
|
log_p_e0 <- function(x)
|
||||||
|
dgamma(x, shape = 1, rate = 20, log = TRUE)
|
||||||
|
}
|
||||||
|
if (E == "e0const") {
|
||||||
|
log_p_e0 <- function(x) log(x == e0)
|
||||||
|
}
|
||||||
|
return(list(log_p_e0 = log_p_e0, param = param))
|
||||||
|
}
|
68
sample_e0_alpha.R
Normal file
68
sample_e0_alpha.R
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
#' Sample alpha conditional on partition and K using an
|
||||||
|
#' Metropolis-Hastings step with log-normal proposal.
|
||||||
|
#'
|
||||||
|
#' @description Sample \eqn{\alpha} conditional on the current
|
||||||
|
#' partition and value of \eqn{K} using an Metropolis-Hastings
|
||||||
|
#' step with log-normal proposal.
|
||||||
|
|
||||||
|
#' @param N A number; indicating the sample size.
|
||||||
|
#' @param Nk An integer vector; indicating the group sizes in the partition.
|
||||||
|
#' @param K A number; indicating the number of components.
|
||||||
|
#' @param alpha A numeric value; indicating the value for \eqn{\alpha}.
|
||||||
|
#' @param s0_proposal A numeric value; indicating the standard deviation of the random walk.
|
||||||
|
#' @param log_pAlpha A function; evaluating the log prior of \eqn{\alpha}.
|
||||||
|
#' @return A named list containing:
|
||||||
|
#' * `"alpha"`: a numeric, the new \eqn{\alpha} value.
|
||||||
|
#' * `"acc"`: logical indicating acceptance.
|
||||||
|
|
||||||
|
sampleAlpha <- function(N, Nk, K, alpha, s0_proposal, log_pAlpha) {
|
||||||
|
log_post_alpha <- function(x)
|
||||||
|
lgamma(x) - lgamma(N+x) + sum(lgamma(Nk+x/K) - lgamma(x/K)) +
|
||||||
|
log_pAlpha(x)
|
||||||
|
lalpha_p <- log(alpha) + rnorm(1, 0, s0_proposal)
|
||||||
|
alpha_p <- exp(lalpha_p)
|
||||||
|
lalpha1 <- log_post_alpha(alpha_p) - log_post_alpha(alpha) +
|
||||||
|
log(alpha_p) - log(alpha)
|
||||||
|
alpha1 <- exp(lalpha1)
|
||||||
|
acc <- FALSE
|
||||||
|
if (runif(1) <= alpha1) {
|
||||||
|
alpha <- alpha_p
|
||||||
|
acc <- TRUE
|
||||||
|
}
|
||||||
|
return(list(alpha = alpha, acc = acc))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Sample e0 conditional on partition and K using an
|
||||||
|
#' Metropolis-Hastings step with log-normal proposal.
|
||||||
|
#'
|
||||||
|
#' @description Sample \eqn{e_0} conditional on the current partition
|
||||||
|
#' and value of \eqn{K} using an Metropolis-Hastings step with
|
||||||
|
#' log-normal proposal.
|
||||||
|
#'
|
||||||
|
#' @param K A number; indicating the number of components.
|
||||||
|
#' @param Kp A number; indicating the number of filled components \eqn{K_+}.
|
||||||
|
#' @param N A number; indicating the sample size.
|
||||||
|
#' @param Nk An integer vector; indicating the group sizes in the partition.
|
||||||
|
#' @param s0_proposal A numeric value; indicating the standard deviation of the random walk proposal.
|
||||||
|
#' @param e0 A numeric value; indicating the current value of \eqn{e_0}.
|
||||||
|
#' @param log_p_e0 A function; evaluating the log prior of \eqn{e_0}.
|
||||||
|
#' @return A named list containing:
|
||||||
|
#' * `"e0"`: a numeric, the new \eqn{e_0} value.
|
||||||
|
#' * `"acc"`: logical indicating acceptance.
|
||||||
|
|
||||||
|
sampleE0 <- function(K, Kp, N, Nk, s0_proposal, e0, log_p_e0) {
|
||||||
|
log_post_e0 <- function(x)
|
||||||
|
lgamma(K+1) - lgamma(K+1-Kp) + lgamma(K*x) -
|
||||||
|
lgamma(N+K*x) + sum(lgamma(Nk+x) - lgamma(x)) +
|
||||||
|
log_p_e0(x)
|
||||||
|
le0_p <- log(e0) + rnorm(1, 0, s0_proposal)
|
||||||
|
e0_p <- exp(le0_p)
|
||||||
|
lalpha1 <- log_post_e0(e0_p) - log_post_e0(e0) + log(e0_p) - log(e0)
|
||||||
|
alpha1 <- min(exp(lalpha1), 1)
|
||||||
|
acc <- FALSE
|
||||||
|
if (runif(1) <= alpha1) {
|
||||||
|
e0 <- e0_p
|
||||||
|
acc <- TRUE
|
||||||
|
}
|
||||||
|
return(list(e0 = e0, acc = acc))
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user