lab:projects:17bayesian_meta_analysis:r_code_for_estimation [Zhiyong Johnny Zhang]

#===================
## R code for the Meta-analysis
#===================
#install.packages("BMS") ## to calcuate f21hypergeometric function
library(BMS)
library(coda)#diagnostic of the chain

#log power Likelihood of sample correlation

logPowerLik=function(rho, N, r, a){
	## N sample size, >2
	## r sample correlation
	## rho random variable rho
	## a power
	I<-length(r)
	
	object<-0
	for (i in 1:I){
		object <- object + a[i]*( -.5*log(2*pi) 
		                          + log(N[i]-2) 
								  +0.5*(N[i]-4)*log(1-r[i]^2) 
								  + 0.5*(N[i]-1)*log(1-rho^2) 
								  + lgamma(N[i]-1) 
								  - lgamma(N[i]-.5) 
								  - (N[i]-1.5)*log(1-r[i]*rho) 
								  + log(f21hyper(.5, .5, N[i]-0.5, .5*(rho*r[i]+1))))
	}
	object
}


## maximimize the likelihood function
mlerho<-function(N, r, a, ...){
  ## ...: other parameters
  mle<-optimize(logPowerLik, c(-1,1), maximum=TRUE, N=N, r=r, a=a, ...)
  return(list(rho=mle$maximum, mle=mle))
}

## mlerho(N=N.sample, r=rho.sample, a=a.sample)

## 
boundaryrho<-function(N, r, a, interval=1e-4, ep=1e-6){
	theta<-seq(-1, 1, interval)
	exp.density<-exp(unlist(lapply(theta, logPowerLik,  N=N, r=r, a=a)))
	exp.density<-exp.density/max(exp.density)
	max.ind<-which.max(exp.density)
	min.ind.left<-which.min(abs(exp.density[1:max.ind]-ep))
	min.ind.right<-which.min(abs(exp.density[max.ind:length(exp.density)]-ep))+max.ind-1
	return(list(interval=c(theta[min.ind.left], theta[min.ind.right]), density=exp.density))
}

# test<-boundaryR(logPowerLik,  N=N.sample, r=rho.sample, a=a.sample)
# theta<-seq(-1,1,1e-4)
# plot(theta, test$density)
# abline(v=test$interval)

## the main function to do M-H
 metroprho <- function(N, r, a, nbi=1000, nmc=10000, init, ...){
	## fun: fun to be sampled from
	## nbi: burn-in period
	## nmc: length of markov chain after burning
	## ...: pass parameters for the likelihood function

  accept<-0  ## acceptance rate
  M<-nbi+nmc
  theta <- rep(0, M)
  mode<-mlerho(N, r, a)$rho
  if (missing(init)){  
    ## may set the initial value at the maximum of the likelihood function to avoid the burn-in
	theta[1] <- mode
  }else{
	theta[1] <- init
  }
  # find a good boundary
  ab <- boundaryrho(N=N, r=r, a=a, ...)
	
  for(t in 2:(M+1)){    
    thetagen = runif(1, ab$interval)  
    
    accept.r = min(1,exp(logPowerLik(rho=thetagen, N=N, r=r, a=a)-logPowerLik(rho=theta[t-1],N=N, r=r, a=a)))
    check = accept.r - runif(1)
    if(check > 0){
    	theta[t]=thetagen  
		accept <- accept + 1
	}
    else{ 
	  theta[t]=theta[t-1]
	}
  }
  object<-list(theta=theta[(nbi+1):M], mode=mode, accept=accept)
  class(object)<- 'rho'
  return(object)
}

## summary function and plot function
summary.rho<-function(object, alpha=.95, ...){
	rhomcmc<-as.mcmc(object$theta)
	effectN<-effectiveSize(rhomcmc)
	geweke<-geweke.diag(rhomcmc,...)
	hpd<-HPDinterval(rhomcmc)
	
	## print the output
	cat('The results are based on ', length(rhomcmc) , 'MCMC draws', ".\n")
	cat("The acceptance rate is ", object$accept/length(rhomcmc), ".\n")
	cat("The Geweke statistic for convergence test is ", geweke$z, ".\n")
	cat("The effective sample size is ", round(effectN), ".\n")
	cat("The mode of the posterior distribution is ", object$mode, ".\n")
	cat("The HPD interval is ", hpd, ".\n")
	
	invisible(list(mode=object$model, hpd=hpd, effectN=effectN, gewekw=geweke))
}

## plot the MCMC
plot.rho<-function(object, ...){
  ## 
  layout(matrix(c(1,2,3,3), 2, 2, byrow = TRUE))
  theta<-as.mcmc(object$theta)

  acf(theta, main='Autocorrelation plot',...)
  plot(density(theta),main='Density plot',...)
  abline(v=object$mode)
  plot.ts(theta,ylab='rho',main='Trace plot',...)  
}

## Cumulative analysis
cumrho<-function(N, r, a, hpd=FALSE, nbi=1000, nmc=10000, init, ...){
	m<-length(N)
	rho<-rep(0, m)
	
	if (hpd){
		hpd.ci<-array(NA, c(m,2))
		theta<-list()
		cat('Please wait while the program is running ')
		flush.console()
		for (i in 1:m){			
			temp.rho<-metroprho(N[1:i], r[1:i], a[1:i], nbi=nbi, nmc=nmc, ...)
			rho[i]<-temp.rho$mode
			hpd.ci[i, ]<-HPDinterval(as.mcmc(temp.rho$theta))
			theta[[i]]<-temp.rho
			cat(round(i/m*100),'% ')
			flush.console()
		}
		out<-cbind(rho, hpd.ci)
		colnames(out)<-c('rho','HPD', 'CI')
		rownames(out)<-1:m
		cat("\n\n")
		print(out)
		return(list(rho=rho, hpd=hpd.ci,theta=theta))
	}else{
		for (i in 1:m){
			rho[i]<-mlerho(N[1:i], r[1:i], a[1:i])$rho
		}
		return(rho=rho)
	}
}

## sensitive analysis of power a
arho<-function(N, r, a, id=length(N), m=11, ...){
	a.try<-seq(0,1,length=m)
	rho<-rep(0, m)
	for (i in 1:m){
		a[id]<-a.try[i]
		rho[i]<-mlerho(N=N, r=r, a=a)$rho
	}
	
	## plot rho according to a
	plot(a.try, rho, xlab='a', ylab='correlation')
	
	return(rho=rho)
}


mchains<-function(N, r, a, nbi=1000, nmc=10000, m=3, ...){
  mcmc<-mcmc.list()
  init<-seq(-1,1,length=m)
  accept<-mode<-rep(0,m)
  
  for (i in 1:m){
	temp<-metroprho(N=N, r=r, a=a, nbi=nbi, nmc=nmc, init[i], ...)
	mcmc[[i]]<-as.mcmc(temp$theta)
	accept[i]<-temp$accept
	mode[i]<-temp$mode
	cat("Finished chain ",i,"\n")
	flush.console()
  }
  return(list(mcmc=mcmc, accept=accept, mode=mode))
}

## An example
##data
rho.sample=c(0.3, 0.5, 0.8, 0.7)
N.sample=c(100, 120, 30, 150)
a.sample=c(1, 0.3, 0.2, 0.5)

## sampling
res1<-metroprho(N=N.sample, r=rho.sample, a=a.sample)
summary(res1)

## plot
plot(res1)

test1<-cumrho(N=N.sample, r=rho.sample, a=a.sample)
test2<-cumrho(N=N.sample, r=rho.sample, a=a.sample, hpd=TRUE)

## each cumulative one can be investigated, too
plot(test2$theta[[1]])
summary(test2$theta[[2]])

## sensitive analysis
arho(N=N.sample, r=rho.sample, a=a.sample)

## test multipe chain analysis
mcmc<-mchains(N=N.sample, r=rho.sample, a=a.sample)

gelman.diag(mcmc$mcmc)