####################MCMC for GMM with one dimensions and four components######## #### libraries ######## library(stats) library(MASS) library(mvtnorm) library(MCMCpack) ##################### num = 50 ##number of obervations #the mean of each components mu1=-3 mu2=0 mu3=3 mu4=6 sd = 0.55 #the sd for the components x<- NULL # for real data z<- NULL # for component identity ####generate the "observed" data for(i in 1:num){ u=runif(1) if(u<=1/4){ x<- c(x,rnorm(1,mu1,sd)) z<- c(z, 1); } if (u>1/4 && u<=1/2) { x<- c(x,rnorm(1,mu2,sd)) z<- c(z, 2); } if (u>1/2 && u<=3/4) { x<- c(x,rnorm(1,mu3,sd)) z<- c(z, 3); } if(u>3/4 ) { x<- c(x,rnorm(1,mu4,sd)) z<- c(z, 4); } } ########The MCMC start from here ############## mlist<-c(runif(1), runif(1), runif(1), runif(1)) ##sigma = 1; zlist = rep(0, num); alpha <- 0.001 beta <- 0.001 sigma <- 100 pmean <-6 pvar<-100 mu1list<-NULL; mu2list<-NULL; mu3list<-NULL; mu4list<-NULL; w1list <- NULL; w2list <- NULL; w3list <- NULL; w4list <- NULL; sigmalist<-NULL; w<-c(1/4,1/4,1/4,1/4); M=20000 B=2000 for( k in 1:M){ #update the zlist prob1 = (w[1])*dnorm(x, mlist[1] , sqrt(sigma)) prob2 = (w[2])*dnorm(x, mlist[2] , sqrt(sigma)) prob3 = (w[3])*dnorm(x, mlist[3] , sqrt(sigma)) prob4 = (w[4])*dnorm(x, mlist[4] , sqrt(sigma)) sumprob = prob1+prob2+prob3+prob4 #sampling and update zlist for (i in 1:num) zlist[i] = sample(1:4,size=1, prob=c(prob1[i]/sumprob[i],prob2[i]/sumprob[i], prob3[i]/sumprob[i], prob4[i]/sumprob[i])) #update the sigma sigma = 1/rgamma(1,alpha+num/2,beta+sum((x-mlist[zlist])^2)/2) sigmalist<-c(sigmalist, sigma); #update mu1 numOfgroup1 = sum( zlist==1 ); numOfgroup2 = sum( zlist==2 ); numOfgroup3 = sum( zlist==3 ); numOfgroup4 = sum( zlist==4 ); var = 1/(1/pvar+ numOfgroup1/sigma) mean = var*( pmean/pvar+sum(x[zlist==1])/sigma) mlist[1] = rnorm(1,mean,sqrt(var)) var = 1/(1/pvar+ numOfgroup2/sigma) mean = var*( pmean/pvar+sum(x[zlist==2])/sigma) mlist[2] = rnorm(1,mean,sqrt(var)) var = 1/(1/pvar+ numOfgroup3/sigma) mean = var*( pmean/pvar+sum(x[zlist==3])/sigma) mlist[3] = rnorm(1,mean,sqrt(var)) var = 1/(1/pvar+ numOfgroup4/sigma) mean = var*( pmean/pvar+sum(x[zlist==4])/sigma) mlist[4] = rnorm(1,mean,sqrt(var)) ###perform Ramdom Permutation switch###### swith <- sample(1:4); temp1 <- mlist[swith[1]]; temp2 <- mlist[swith[2]]; temp3 <- mlist[swith[3]]; temp4 <- mlist[swith[4]]; mlist[1]<-temp1; mlist[2]<-temp2; mlist[3]<-temp3; mlist[4]<-temp4; mu1list<-c(mu1list, mlist[1]) mu2list <- c(mu2list, mlist[2]) mu3list<-c(mu3list, mlist[3]) mu4list <- c(mu4list, mlist[4]) mu1list<-c(mu1list, mlist[1]) mu2list <- c(mu2list, mlist[2]); mu3list<-c(mu3list, mlist[3]) mu4list <- c(mu4list, mlist[4]); #update w1 w = rdirichlet( 1, c(1+numOfgroup1, 1+numOfgroup2, 1+numOfgroup3, 1+numOfgroup4) ) w1list<-c(w1list, w[1]); w2list<-c(w2list, w[2]); w3list<-c(w3list, w[3]); w4list<-c(w4list, w[4]); } cat("complete MCMC"); ##re-sample 2000 samples from MCMC data for re-labeling newM<-2000; ###Q matrix #Pij<-matrix(0, num, 4); pij<-array(1/3, c(newM,24, num,4)); res<-sample(B:M, newM, replace=FALSE); meanlist<-cbind(mu1list[res], mu2list[res], mu3list[res], mu4list[res]); pilist <- cbind(w1list[res], w2list[res], w3list[res], w4list[res]); #####all the perm<-rbind(c(1,2,3,4), c(1,3,2,4), c(2,1,3,4),c(2,3,1,4),c(3,1,2,4),c(3,2,1,4), c(1,2,4,3), c(1,3,4,2), c(2,1,4,3),c(2,3,4,1),c(3,1,4,2),c(3,2,4,1), c(1,4,2,3), c(1,4,3,2), c(2,4,1,3),c(2,4,3,1),c(3,4,1,2),c(3,4,2,1), c(4,1,2,3), c(4,1,3,2), c(4,2,1,3),c(4,2,3,1),c(4,3,1,2),c(4,3,2,1) ); pmatrix=function(i,j,t,m){ a=0 for (k in 1:4) { a = a+pilist[m,k] * dnorm(x[i], meanlist[m,perm[t,k]] , sqrt(sigmalist[m]) ); } pilist[m,j]*dnorm(x[i], meanlist[m,perm[t,j]],sqrt(sigmalist[m]) )/a } minpij<-function(s, t, Qij){ # sum( pij[s, t,,]*log(pij[s,t,,]/Qij)) sum( exp(pij[s, t,,]) + pij[s,t,,]/Qij ); } ######compute the pij matrix############################################ for (m in 1:newM) { for (t in 1:24 ){ for (i in 1:num){ for (j in 1:4){ pij[m,t,i,j]<-pmatrix(i,j,t,m); } } } #end of m for loop } #end of p for loop cat("complete pij " , '\n'); oldperm<-rep(1,newM); newperm<-rep(1,newM); exi = 0; while (exi != 1 ){ cat("loop " , '\n'); ##update Q Q=matrix(0, num,4); for( m in 1:newM){ Q<-pij[m,oldperm[m],,]+Q; } Q<-Q/M; ###choose permutation for( s in 1:newM){ ##loop through MCMC samples minlist<-NULL; for(t in 1:24){ #for each permutation, find the pij value minlist<-c(minlist, minpij( s, t, Q) ); } mindex <- which.min(minlist); newperm[s]<-mindex; } if( sum(oldperm==newperm)>newM-10 ) {exi = 1} else { oldperm = newperm; } } newmu1list<-NULL; newmu2list<-NULL; newmu3list<-NULL; newmu4list<-NULL; for( s in 1:newM){ newmu1list <- c(newmu1list, meanlist[ s, perm[newperm[s],1] ] ); newmu2list <- c(newmu2list, meanlist[ s, perm[newperm[s],2] ] ); newmu3list <- c(newmu3list, meanlist[ s, perm[newperm[s],3] ] ); newmu4list <- c(newmu4list, meanlist[ s, perm[newperm[s],4] ] ); } cat("mean of newmu1 is: " , mean(newmu1list) , "\n"); cat("mean of newmu2 is: " , mean(newmu2list) , "\n"); cat("mean of newmu3 is: " , mean(newmu3list) , "\n"); cat("mean of newmu4 is: " , mean(newmu4list) , "\n"); cat("mean of mu1 is: " , mean(mu1list) , "\n"); cat("mean of mu2 is: " , mean(mu2list) , "\n"); cat("mean of mu3 is: " , mean(mu3list) , "\n"); cat("mean of mu4 is: " , mean(mu4list) , "\n");