codes:modified_basic_mdp_r_code [Zhiyong Johnny Zhang]


################################################################
# 
# Function 2.  A function to generate theta_i for the cluster 
#              structure
#
# Tasks:  Compute probabilities of joining cluster, beginning
#           new cluster
#         Generate cluster membership
#         Update s and n_i
#         Update k and theta_star if needed
#
################################################################

fn.gen.theta.i.2 <- function(i,clust,prior,mu,data)
{
 n.i <- clust$n.i
 theta.star <- clust$theta.star
 k <- clust$k
 s <- clust$s
 sig.sq <- prior$sig.sq
 tau.sq <- prior$tau.sq
 M <- prior$M
 x <- data$x
#############################
# prb <- c(n.i,M)
# for (j in 1:k)
#  {
#   tmp.m <- theta.star[j]
#   tmp.v <- sig.sq
#   prb[j] <- prb[j] * dnorm(x[i],mean=tmp.m,sd=sqrt(tmp.v))
#  }
# prb[k + 1] <- prb[k + 1] * 
#     dnorm(x[i], mean=mu, sd=sqrt(tau.sq + sig.sq))
#### replacement follows ####
prb <- n.i * dnorm(x[i],mean=theta.star,sd=sig.sq)
prb <- c(prb,M * dnorm(x[i],mean=mu,sd=sqrt(tau.sq + sig.sq)))
#############################

 tmp <- sample(1:(k+1),size=1,prob=prb)

 if (tmp > k)
  {
   s[i] <- tmp
   k <- k + 1
   n.i <- c(n.i,1)
   tmp.m <- ((1/sig.sq)*x[i] + (1/tau.sq)*mu) / 
            ((1/sig.sq) + (1/tau.sq))
   tmp.v <- 1/((1/sig.sq) + (1/tau.sq))
   tmp <- rnorm(n=1,mean=tmp.m,sd=sqrt(tmp.v))
   theta.star <- c(theta.star,tmp)
  }
 else
  {
   s[i] <- tmp
   n.i[tmp] <- n.i[tmp] + 1
  }

 clust$k <- k
 clust$n.i <- n.i
 clust$s <- s
 clust$theta.star <- theta.star

 return(clust)
}


################################################################
# 
# Function 3.  A function to generate theta_star 
#
# Tasks:  Loop through i = 1, ..., k
#         Find cond'l posterior distribution for theta_star[i]
#         Generate theta_star[i]
#
################################################################

fn.gen.theta.star.2 <- function(clust,prior,mu,data)
{
 k <- clust$k
 n.i <- clust$n.i
 s <- clust$s
 theta.star <- clust$theta.star
 tau.sq <- prior$tau.sq
 sig.sq <- prior$sig.sq
 x <- data$x

#############################
# for (i in 1:k)
#  {
#   tmp.m <- ((n.i[i]/sig.sq)*mean(x[s==i]) + 
#             (1/tau.sq)*mu) / 
#            ((n.i[i]/sig.sq) + (1/tau.sq))
#   tmp.v <- 1/((n.i[i]/sig.sq) + (1/tau.sq))
#   theta.star[i] <- rnorm(n=1,mean=tmp.m,sd=sqrt(tmp.v))
#  }
### replacement for above ###
tmp.m <- rep(0,k)
for (i in 1:k) 
 {
  tmp.m[i] <- ((n.i[i]/sig.sq)*mean(x[s==i]) + 
             (1/tau.sq)*mu) / 
             ((n.i[i]/sig.sq) + (1/tau.sq))
 }
tmp.v <- 1/((n.i/sig.sq) + (1/tau.sq))
theta.star <- rnorm(n=k,mean=tmp.m,sd=sqrt(tmp.v))
#############################

 clust$theta.star <- theta.star
 return(clust)
}


################################################################
# 
# Function 4.  A function to generate mu 
#
# Tasks:  Find cond'l posterior distribution for mu
#         Generate mu
#
################################################################

fn.gen.mu.2 <- function(clust,prior)
{
 k <- clust$k
 theta.star <- clust$theta.star
 tau.sq <- prior$tau.sq
 rho.sq <- prior$rho.sq
 mu.0 <- prior$mu.0
 
 tmp.m <- ((k/tau.sq)*mean(theta.star) +
           (1 / rho.sq) * mu.0) / 
          ((k/tau.sq) + (1/rho.sq))
 tmp.v <- 1 / ((k/tau.sq) + (1/rho.sq))
 mu <- rnorm(n=1,mean=tmp.m,sd=sqrt(tmp.v))
 
 return(mu)
}


################################################################
# 
# Function 5.  One iterate of the Gibbs sampler 
#
# Tasks:  Generate each theta_i in turn
#         Generate theta_star
#         Generate mu
#
################################################################

fn.one.iterate.2 <- function(clust,prior,mu,data)
{
 for (i in 1:data$n)
  {
   clust <- fn.remove.theta.i(i,clust)
   clust <- fn.gen.theta.i.2(i,clust,prior,mu,data)
  }
 clust <- fn.gen.theta.star.2(clust,prior,mu,data)
 mu <- fn.gen.mu.2(clust,prior)

 ret.obj <- NULL
 ret.obj$clust <- clust
 ret.obj$prior <- prior
 ret.obj$mu <- mu

 return(ret.obj)
}



################################################################
# 
# Function 6.  A brief Gibbs sampler 
#
# Tasks:  Set up object (here, matrix) to store results
#         Run one iterate of Gibbs sampler
#         Tally results
#
# Improvements for you to make:
#   Burn-in -- allow explicit description of burn-in to be
#              discarded
#   Subsampling -- Not to be done unless storage issues are
#                  important.  But, allow subsampling of the
#                  output  
#   Initialization -- an automated initialization routine.  
#                     Best to allow a couple of options for
#                     the initialization.  
#
################################################################

fn.gibbs.sampler.2 <- function(n.reps,prior,data,clust,mu)
{
 # Insert initialization routine if desired
 # Insert burn-in period if desired
 res.mat <- matrix(rep(0,n.reps*(data$n+1)),nrow=n.reps)

 for (i in 1:n.reps)
  {
   tmp <- fn.one.iterate.2(clust,prior,mu,data)
   clust <- tmp$clust
   mu <- tmp$mu
   res.mat[i,] <- c(mu,clust$theta.star[clust$s])
# print(clust$k)
  }

 return(res.mat)
}






# create appropriate data object

data1 <- NULL
data1$x <- bodyfat.mat[,4]
data1$n <- length(data1$x)

prior1 <- NULL
prior1$mu.0 <- 180
prior1$rho.sq <- 400
prior1$M <- 20
prior1$tau.sq <- 225
prior1$sig.sq <- 100

clust1 <- NULL
clust1$k <- data1$n
clust1$s <- 1:data1$n
clust1$n.i <- rep(1,data1$n)
clust1$theta.star <- data1$x

mu1 <- prior1$mu.0

n.reps <- 1000
date()
system.time(
res.mat.2.nm <- fn.gibbs.sampler.2.nomix(n.reps,prior1,data1,clust1,mu1)
,)
date()


################################################################
#
# Simple objects and functions that let one check the speed for
#  various data structures and calls.  Some of these follow 
#  functions from Mario Peruggia.   
#
################################################################


e1 <- NULL
e1$a <- 1
e1$b <- 2
e2 <- c(1,2)
e3 <- NULL
e3$a <- c(1,2)

f1 <- function(e1)
{
 a <- e1$a
 for (i in 1:1000000) {a <- a + 1}

 e1$a <- a
 return(e1)
}

f2 <- function(e1)
{
 for (i in 1:1000000) {e1$a <- e1$a + 1}
 return(e1)
}

f3 <- function(e2)
{
 for (i in 1:1000000) {e2[1] <- e2[1] + 1}
 return(e2)
}

f4 <- function(e3)
{
 for (i in 1:1000000) {e3$a[1] <- e3$a[1] + 1}
 return(e3)
}

f5 <- function(e2)
{
 a <- e2[1]
 for (i in 1:1000000) {a <- a + 1}

 e2[1] <- a
 return(e2)
}

f6 <- function(e2)
{
 a <- e2
 for (i in 1:1000000) {a[1] <- a[1] + 1}

 return(a)
}

f7 <- function(e1)
{
 for (i in 1:1000000) {e1$b <- e1$b + 1}
 return(e1)
}


system.time(f1(e1))
system.time(f2(e1))
system.time(f3(e2))
system.time(f4(e3))
system.time(f5(e2))
system.time(f6(e2))
system.time(f7(e1))