# Modification of MDP code to study mixing at a more basic level

# First, a Gibbs sampler without the remixing step.  If this sampler
#  is run with a large value of M (so that there are nearly as many
#  clusters as observations in the data set, mixing is nearly as good
#  as with the sampler with the remixing step.  If the value of M
#  is relatively small, there are lots of "repeats", as clusters don't
#  often move.  This behavior is not uniform across the cases in the 
#  data set, but depends on the local properties of data and prior.  
#  It is often very different for observations in the middle of the 
#  data set and those at the extremes.  

# The functions below make use of the quicker code in functions that
#  end with ".2".  These are available in the modified 


################################################################
# 
# Function 5.  One iterate of the Gibbs sampler 
#
# Tasks:  Generate each theta_i in turn
#         Generate theta_star
#         Generate mu
#
################################################################

fn.one.iterate.2.nomix <- function(clust,prior,mu,data)
{
 for (i in 1:data$n)
  {
   clust <- fn.remove.theta.i(i,clust)
   clust <- fn.gen.theta.i.2(i,clust,prior,mu,data)
  }
#  clust <- fn.gen.theta.star.2(clust,prior,mu,data)
 mu <- fn.gen.mu.2(clust,prior)

 ret.obj <- NULL
 ret.obj$clust <- clust
 ret.obj$prior <- prior
 ret.obj$mu <- mu

 return(ret.obj)
}



################################################################
# 
# Function 6.  A brief Gibbs sampler 
#
# Tasks:  Set up object (here, matrix) to store results
#         Run one iterate of Gibbs sampler
#         Tally results
#
# Improvements for you to make:
#   Burn-in -- allow explicit description of burn-in to be
#              discarded
#   Subsampling -- Not to be done unless storage issues are
#                  important.  But, allow subsampling of the
#                  output  
#   Initialization -- an automated initialization routine.  
#                     Best to allow a couple of options for
#                     the initialization.  
#
################################################################

fn.gibbs.sampler.2.nomix <- function(n.reps,prior,data,clust,mu)
{
 # Insert initialization routine if desired
 # Insert burn-in period if desired
 res.mat <- matrix(rep(0,n.reps*(data$n+1)),nrow=n.reps)

 for (i in 1:n.reps)
  {
   tmp <- fn.one.iterate.2.nomix(clust,prior,mu,data)
   clust <- tmp$clust
   mu <- tmp$mu
   res.mat[i,] <- c(mu,clust$theta.star[clust$s])
# print(clust$k)
  }

 return(res.mat)
}


# create appropriate data object

data1 <- NULL
data1$x <- bodyfat.mat[,4]
data1$n <- length(data1$x)

prior1 <- NULL
prior1$mu.0 <- 180
prior1$rho.sq <- 400
prior1$M <- 1                   # for this data set, use M=20 for a 
                                # large value, M=1 for a small value
prior1$tau.sq <- 225
prior1$sig.sq <- 100

clust1 <- NULL
clust1$k <- data1$n
clust1$s <- 1:data1$n
clust1$n.i <- rep(1,data1$n)
clust1$theta.star <- data1$x

mu1 <- prior1$mu.0


n.reps <- 2000
date()
system.time(
res.mat.2.nm <- fn.gibbs.sampler.2.nomix(n.reps,prior1,data1,clust1,mu1)
,)
date()

par(mfrow=c(2,1))
# choice of individual 82 (position 83 since mu is stored in position 1)
#  is arbitrary
length(unique(res.mat.2.nm[,83]))
plot(res.mat.2.nm[,83])
plot(density(res.mat.2.nm[,83]))

date()
system.time(
res.mat.2 <- fn.gibbs.sampler.2(n.reps,prior1,data1,clust1,mu1)
,)
date()

length(unique(res.mat.2[,83]))
plot(res.mat.2[,83])
plot(density(res.mat.2[,83]))

acf(res.mat.2.nm[,83])
acf(res.mat.2[,83])

# Note--the long, horizontal black blobs in the "no-mixing" plot 
#  indicate poor mixing.  Also note the density plots.  The 
#  auto-correlation plots tell an interesting tale.  



# Next, a function that lets us make use of a "denoised" version 
#  of theta_i.  This denoised version replaces the generated value
#  of theta_i with the mean of the appropriate theta.star value (just
#  before its generation).  While these "denoised" theta_i would not
#  be used to get estimates of the posterior density of theta_i or 
#  its variance, they can be used for "Rao-Blackwellized" estimates
#  of the posterior mean of theta_i.  
#  of the time plot for the theta_i.  

################################################################
# 
# Function 3.  A function to generate theta_star 
#
# Tasks:  Loop through i = 1, ..., k
#         Find cond'l posterior distribution for theta_star[i]
#         Generate theta_star[i]
#
################################################################

fn.gen.theta.star.3 <- function(clust,prior,mu,data)
{
 k <- clust$k
 n.i <- clust$n.i
 s <- clust$s
 theta.star <- clust$theta.star
 tau.sq <- prior$tau.sq
 sig.sq <- prior$sig.sq
 x <- data$x

#############################
# for (i in 1:k)
#  {
#   tmp.m <- ((n.i[i]/sig.sq)*mean(x[s==i]) + 
#             (1/tau.sq)*mu) / 
#            ((n.i[i]/sig.sq) + (1/tau.sq))
#   tmp.v <- 1/((n.i[i]/sig.sq) + (1/tau.sq))
#   theta.star[i] <- rnorm(n=1,mean=tmp.m,sd=sqrt(tmp.v))
#  }
### replacement for above ###
tmp.m <- rep(0,k)
for (i in 1:k) 
 {
  tmp.m[i] <- ((n.i[i]/sig.sq)*mean(x[s==i]) + 
             (1/tau.sq)*mu) / 
             ((n.i[i]/sig.sq) + (1/tau.sq))
 }
tmp.v <- 1/((n.i/sig.sq) + (1/tau.sq))
theta.star <- rnorm(n=k,mean=tmp.m,sd=sqrt(tmp.v))
#############################

 clust$theta.star <- theta.star
# The single line below passes the means of the theta.star
 clust$theta.star.mean <- tmp.m
 return(clust)
}


################################################################
# 
# Function 5.  One iterate of the Gibbs sampler 
#
# Tasks:  Generate each theta_i in turn
#         Generate theta_star
#         Generate mu
#
################################################################

fn.one.iterate.3 <- function(clust,prior,mu,data)
{
 for (i in 1:data$n)
  {
   clust <- fn.remove.theta.i(i,clust)
   clust <- fn.gen.theta.i.2(i,clust,prior,mu,data)
  }
 clust <- fn.gen.theta.star.3(clust,prior,mu,data)
 mu <- fn.gen.mu.2(clust,prior)

 ret.obj <- NULL
 ret.obj$clust <- clust
 ret.obj$prior <- prior
 ret.obj$mu <- mu

 return(ret.obj)
}



################################################################
# 
# Function 6.  A brief Gibbs sampler 
#
# Tasks:  Set up object (here, matrix) to store results
#         Run one iterate of Gibbs sampler
#         Tally results
#
# Improvements for you to make:
#   Burn-in -- allow explicit description of burn-in to be
#              discarded
#   Subsampling -- Not to be done unless storage issues are
#                  important.  But, allow subsampling of the
#                  output  
#   Initialization -- an automated initialization routine.  
#                     Best to allow a couple of options for
#                     the initialization.  
#
################################################################

fn.gibbs.sampler.3 <- function(n.reps,prior,data,clust,mu)
{
 # Insert initialization routine if desired
 # Insert burn-in period if desired
 res.mat <- matrix(rep(0,n.reps*(2*data$n+1)),nrow=n.reps)

 for (i in 1:n.reps)
  {
   tmp <- fn.one.iterate.3(clust,prior,mu,data)
   clust <- tmp$clust
   mu <- tmp$mu
   res.mat[i,] <- c(mu,clust$theta.star[clust$s],clust$theta.star.mean[clust$s])
# print(clust$k)
  }

 return(res.mat)
}



# create appropriate data object

data1 <- NULL
data1$x <- bodyfat.mat[,4]
data1$n <- length(data1$x)

prior1 <- NULL
prior1$mu.0 <- 180
prior1$rho.sq <- 400
prior1$M <- 20
prior1$tau.sq <- 225
prior1$sig.sq <- 100

clust1 <- NULL
clust1$k <- data1$n
clust1$s <- 1:data1$n
clust1$n.i <- rep(1,data1$n)
clust1$theta.star <- data1$x

mu1 <- prior1$mu.0



# A run of the code, with 5000 replicates.  After the code hsa
#  been run, a comparison of the variances of the stored values.  
#  Note that the variance of the estimator is not this value 
#  divided by n.reps.  An easy way to get a good estimator for
#  variation in the estimator of theta.i is the batch means method. 
#  See the brief function below.  

n.reps <- 5000
date()
system.time(
res.mat.2 <- fn.gibbs.sampler.2(n.reps,prior1,data1,clust1,mu1)
,)
date()
system.time(
res.mat.3 <- fn.gibbs.sampler.3(n.reps,prior1,data1,clust1,mu1)
,)
date()


var.mat <- matrix(rep(0,252*3),ncol=3)
for (i in 1:252)
{
var.mat[i,1] <- var(res.mat.2[,1+i])
var.mat[i,2] <- var(res.mat.3[,1+i])
var.mat[i,3] <- var(res.mat.3[,253+i])
}

apply(var.mat,2,mean)

t.test(var.mat[,1] - var.mat[,2])
t.test(var.mat[,1] - var.mat[,3])
t.test(var.mat[,2] - var.mat[,3])

fn.batch.se <- function(x,batchsize=100)
{
 num.mns <- length(x)/batchsize
 tmp.mns <- rep(0,num.mns)
 for (i in 1:num.mns)
  {tmp.mns[i] <- mean(x[((i-1)*batchsize+1):(i*batchsize)])}
 batch.var <- var(tmp.mns)/num.mns
 return(sqrt(batch.var))
}

mean(apply(res.mat.2[,2:253],2,fn.batch.se))
mean(apply(res.mat.3[,2:253],2,fn.batch.se))
mean(apply(res.mat.3[,254:505],2,fn.batch.se))

par(mfrow=c(2,1))
plot(apply(res.mat.3[,2:253],2,mean),apply(res.mat.3[,2:253],2,var))
plot(apply(res.mat.3[,254:505],2,fn.batch.se),apply(res.mat.3[,2:253],2,fn.batch.se))
lines(apply(res.mat.3[,254:505],2,fn.batch.se),apply(res.mat.3[,254:505],2,fn.batch.se))