################################################################ # # Function 2. A function to generate theta_i for the cluster # structure # # Tasks: Compute probabilities of joining cluster, beginning # new cluster # Generate cluster membership # Update s and n_i # Update k and theta_star if needed # ################################################################ fn.gen.theta.i.2 <- function(i,clust,prior,mu,data) { n.i <- clust$n.i theta.star <- clust$theta.star k <- clust$k s <- clust$s sig.sq <- prior$sig.sq tau.sq <- prior$tau.sq M <- prior$M x <- data$x ############################# # prb <- c(n.i,M) # for (j in 1:k) # { # tmp.m <- theta.star[j] # tmp.v <- sig.sq # prb[j] <- prb[j] * dnorm(x[i],mean=tmp.m,sd=sqrt(tmp.v)) # } # prb[k + 1] <- prb[k + 1] * # dnorm(x[i], mean=mu, sd=sqrt(tau.sq + sig.sq)) #### replacement follows #### prb <- n.i * dnorm(x[i],mean=theta.star,sd=sig.sq) prb <- c(prb,M * dnorm(x[i],mean=mu,sd=sqrt(tau.sq + sig.sq))) ############################# tmp <- sample(1:(k+1),size=1,prob=prb) if (tmp > k) { s[i] <- tmp k <- k + 1 n.i <- c(n.i,1) tmp.m <- ((1/sig.sq)*x[i] + (1/tau.sq)*mu) / ((1/sig.sq) + (1/tau.sq)) tmp.v <- 1/((1/sig.sq) + (1/tau.sq)) tmp <- rnorm(n=1,mean=tmp.m,sd=sqrt(tmp.v)) theta.star <- c(theta.star,tmp) } else { s[i] <- tmp n.i[tmp] <- n.i[tmp] + 1 } clust$k <- k clust$n.i <- n.i clust$s <- s clust$theta.star <- theta.star return(clust) } ################################################################ # # Function 3. A function to generate theta_star # # Tasks: Loop through i = 1, ..., k # Find cond'l posterior distribution for theta_star[i] # Generate theta_star[i] # ################################################################ fn.gen.theta.star.2 <- function(clust,prior,mu,data) { k <- clust$k n.i <- clust$n.i s <- clust$s theta.star <- clust$theta.star tau.sq <- prior$tau.sq sig.sq <- prior$sig.sq x <- data$x ############################# # for (i in 1:k) # { # tmp.m <- ((n.i[i]/sig.sq)*mean(x[s==i]) + # (1/tau.sq)*mu) / # ((n.i[i]/sig.sq) + (1/tau.sq)) # tmp.v <- 1/((n.i[i]/sig.sq) + (1/tau.sq)) # theta.star[i] <- rnorm(n=1,mean=tmp.m,sd=sqrt(tmp.v)) # } ### replacement for above ### tmp.m <- rep(0,k) for (i in 1:k) { tmp.m[i] <- ((n.i[i]/sig.sq)*mean(x[s==i]) + (1/tau.sq)*mu) / ((n.i[i]/sig.sq) + (1/tau.sq)) } tmp.v <- 1/((n.i/sig.sq) + (1/tau.sq)) theta.star <- rnorm(n=k,mean=tmp.m,sd=sqrt(tmp.v)) ############################# clust$theta.star <- theta.star return(clust) } ################################################################ # # Function 4. A function to generate mu # # Tasks: Find cond'l posterior distribution for mu # Generate mu # ################################################################ fn.gen.mu.2 <- function(clust,prior) { k <- clust$k theta.star <- clust$theta.star tau.sq <- prior$tau.sq rho.sq <- prior$rho.sq mu.0 <- prior$mu.0 tmp.m <- ((k/tau.sq)*mean(theta.star) + (1 / rho.sq) * mu.0) / ((k/tau.sq) + (1/rho.sq)) tmp.v <- 1 / ((k/tau.sq) + (1/rho.sq)) mu <- rnorm(n=1,mean=tmp.m,sd=sqrt(tmp.v)) return(mu) } ################################################################ # # Function 5. One iterate of the Gibbs sampler # # Tasks: Generate each theta_i in turn # Generate theta_star # Generate mu # ################################################################ fn.one.iterate.2 <- function(clust,prior,mu,data) { for (i in 1:data$n) { clust <- fn.remove.theta.i(i,clust) clust <- fn.gen.theta.i.2(i,clust,prior,mu,data) } clust <- fn.gen.theta.star.2(clust,prior,mu,data) mu <- fn.gen.mu.2(clust,prior) ret.obj <- NULL ret.obj$clust <- clust ret.obj$prior <- prior ret.obj$mu <- mu return(ret.obj) } ################################################################ # # Function 6. A brief Gibbs sampler # # Tasks: Set up object (here, matrix) to store results # Run one iterate of Gibbs sampler # Tally results # # Improvements for you to make: # Burn-in -- allow explicit description of burn-in to be # discarded # Subsampling -- Not to be done unless storage issues are # important. But, allow subsampling of the # output # Initialization -- an automated initialization routine. # Best to allow a couple of options for # the initialization. # ################################################################ fn.gibbs.sampler.2 <- function(n.reps,prior,data,clust,mu) { # Insert initialization routine if desired # Insert burn-in period if desired res.mat <- matrix(rep(0,n.reps*(data$n+1)),nrow=n.reps) for (i in 1:n.reps) { tmp <- fn.one.iterate.2(clust,prior,mu,data) clust <- tmp$clust mu <- tmp$mu res.mat[i,] <- c(mu,clust$theta.star[clust$s]) # print(clust$k) } return(res.mat) } # create appropriate data object data1 <- NULL data1$x <- bodyfat.mat[,4] data1$n <- length(data1$x) prior1 <- NULL prior1$mu.0 <- 180 prior1$rho.sq <- 400 prior1$M <- 20 prior1$tau.sq <- 225 prior1$sig.sq <- 100 clust1 <- NULL clust1$k <- data1$n clust1$s <- 1:data1$n clust1$n.i <- rep(1,data1$n) clust1$theta.star <- data1$x mu1 <- prior1$mu.0 n.reps <- 1000 date() system.time( res.mat.2.nm <- fn.gibbs.sampler.2.nomix(n.reps,prior1,data1,clust1,mu1) ,) date() ################################################################ # # Simple objects and functions that let one check the speed for # various data structures and calls. Some of these follow # functions from Mario Peruggia. # ################################################################ e1 <- NULL e1$a <- 1 e1$b <- 2 e2 <- c(1,2) e3 <- NULL e3$a <- c(1,2) f1 <- function(e1) { a <- e1$a for (i in 1:1000000) {a <- a + 1} e1$a <- a return(e1) } f2 <- function(e1) { for (i in 1:1000000) {e1$a <- e1$a + 1} return(e1) } f3 <- function(e2) { for (i in 1:1000000) {e2[1] <- e2[1] + 1} return(e2) } f4 <- function(e3) { for (i in 1:1000000) {e3$a[1] <- e3$a[1] + 1} return(e3) } f5 <- function(e2) { a <- e2[1] for (i in 1:1000000) {a <- a + 1} e2[1] <- a return(e2) } f6 <- function(e2) { a <- e2 for (i in 1:1000000) {a[1] <- a[1] + 1} return(a) } f7 <- function(e1) { for (i in 1:1000000) {e1$b <- e1$b + 1} return(e1) } system.time(f1(e1)) system.time(f2(e1)) system.time(f3(e2)) system.time(f4(e3)) system.time(f5(e2)) system.time(f6(e2)) system.time(f7(e1))