#Here I declare all the libraries I need library(arm) library(mvtnorm) library(MCMCpack) # # # #Next I will define all the custom functions I need to run the sampler # # # #my function to index non-ordered values index.me <- function(v){ x <- sort(unique(v)) index <- rep(0,length(v)) level <- 1:length(x) for(i in 1:length(v)){ for (j in 1:length(x)){ if(v[i]==x[j]) index[i] <- j } } return(index)} #Here is the mulivariate hierarchical sampler. #betas is the previous estimate of betas #my.Sigma is the previous estimate of the vcov matrix #mle is the estimates to be used for the unit information prior #m is the number of groups mvnorm.hierarch.samp <- function(J,betas,my.Sigma,mle) { #This is the number of parameters p <- 2 #Here I will define all my priors eta0 <- p+2 L0 <- S0 <- cov(mle[,1:2]) iL0 <- solve(L0) mu0 <- apply(mle[,1:2],2,mean) #Here I sample for Theta Lm <- solve(iL0 + J*my.Sigma) mum <- Lm%*%(iL0%*%mu0 + my.Sigma%*%apply(betas,2,sum)) theta <- t(rmvnorm(1,mum,Lm)) #Here I sample for Sigma mtheta <- matrix(theta,J,p,byrow=TRUE) iSigma <- rwish(eta0+J,solve(S0+t(betas-mtheta)%*%(betas-mtheta))) return(list(theta = t(theta),iSigma = iSigma)) } #This function defines the normal model sampling #In this function J is the number of groups # g.id is the group identification #X is a design matrix #Y is the response variable #Sigma is the variance covariance matrix for thetas #Theta is the estimate of the hierarrchical parameters drawn from #the hierarchical distribution #It will return an output vector of length J mvnorm.samp <- function(J,g.id, X, Y, s2, Sigma,Theta){ out.vec <- matrix(NA,nrow=J,ncol=2) for(i in 1:J){ Vj<-solve(Sigma + t(X[g.id==i,])%*%X[g.id==i,]/s2[z] ) Ej<-Vj%*%(Sigma%*%t(t(Theta)) + t(X[g.id==i,])%*%Y[g.id==i]/s2[z] ) out.vec[i,]<-rmvnorm(1,Ej,Vj) } return(out.vec)} #this term samples the full variance for the whole model #its a bit tricky because it applies to the whole model #and requires that we've reassembled the estimates #m in this function is the total number of unique treatment, #and should be k * j (or 49 in my case), #g.id is the group id's and should be 1:(j*k) #Y is the response matrix and X is the design matrix #theta is the pooled estimate of all the individual factors sig.samp <- function(m, Y,X, g.id,Theta){ nu0 <-1 s20 <- .5 RSS<-0 for(i in 1:m) { RSS<-RSS+sum( (Y[g.id==i]-X[g.id==i,]%*%Theta[m,] )^2 ) } s2<-1/rgamma(1,(nu0+length(Y))/2, (nu0*s20+RSS)/2 ) return(s2)} #Here I read in all the data files chiro.log <- read.csv("chirolog.txt") w.means <- read.csv("wmeans.txt") w.cv <- read.csv("wcs.txt") #I need to add lines to strip the read files of row indexes w.means <- as.vector(w.means[,2]) w.cv <- as.vector(w.cv[,2]) chiro.log <- chiro.log[,2:17] chiro.log <- as.matrix(chiro.log) #First I will take the log differences of population size Y <- as.vector(apply(mos.log,1,diff)) #next I will create the design matrix X <- as.vector(t(mos.log[,1:15])) X <- exp(X) X <-cbind(rep(1,length(X)),X) #now I need to create the group id's I will #create id's for the individual j*k treatments #as well as the the j and k treatments indiv.id <- NULL for(i in 1:49){ indiv.id <- c(indiv.id, rep(i,15))} #id's for the water level CV cv.id <- NULL for(i in 1:49){ cv.id <- c(cv.id, rep(index.me(w.cv)[i],15))} #id's for the mean water level wl.id <- NULL for(i in 1:49){ wl.id <- c(wl.id, rep(index.me(w.means)[i],15))} #first I will get the mle estimates for the data mle.est <- matrix(NA,nrow=J*K,ncol=2) for(i in 1:49){ mle.est[i,] <- coef(lm(Y[indiv.id==i]~X[indiv.id==i,2]))} #the number of iterations niter <- 10000 J<- 7 K <- 7 #now I will create the data storage matrices #J is the number of groups wl.est <- array(NA,c(J,2,niter)) cv.est <- array(NA,c(K,2,niter)) indiv.est <- array(NA,c(K*J,2,niter)) #this is the final estimation, the sum of all the parts. h.theta.est <- array(NA,c(K*J,2,niter)) #the .c is for centered around the mean. wl.est.c <- array(NA,c(J,2,niter)) cv.est.c <- array(NA,c(K,2,niter)) indiv.est.c <- array(NA,c(K*J,2,niter)) #these matrices will hold the estimates of theta. wl.theta <- matrix(NA,nrow=niter,ncol=2) cv.theta <- matrix(NA,nrow=niter,ncol=2) indiv.theta <- matrix(NA,nrow=niter,ncol=2) #this is the overall model variance s2 <- vector() e.y <- array(NA,c(15,J*K,niter)) #these matrices hold the estimates of Sigma wl.Sigma <- array(NA,c(2,2,niter)) cv.Sigma <- array(NA,c(2,2,niter)) indiv.Sigma <- array(NA,c(2,2,niter)) #we need to provide initial starting points for each data point wl.est[,,1] <-cv.est [,,1]<- matrix(c(rnorm(J,.4,.1),rnorm(J,-.02,.005)),nrow=J,ncol=2,byrow=F) indiv.est[,,1]<-h.theta.est[,,1] <- matrix(c(rnorm(49,.4,.1),rnorm(49,-.06,.01)),nrow=J*K,ncol=2,byrow=F) wl.est.c[,,1] <- wl.est[,,1] - matrix(apply(wl.est[,,1],2,mean),nrow=J,ncol=2,byrow=TRUE) cv.est.c[,,1] <- cv.est[,,1] - matrix(apply(cv.est[,,1],2,mean),nrow=K,ncol=2,byrow=TRUE) indiv.est.c[,,1] <- indiv.est[,,1] - matrix(apply(indiv.est[,,1],2,mean),nrow=J*K,ncol=2,byrow=TRUE) wl.theta[1,] <- cv.theta[1,] <- indiv.theta[1,] <- c(.4,-.02) indiv.Sigma[,,1] <- solve(cov(mle.est)) wl.Sigma[,,1] <- solve(cov(cbind(tapply(mle.est[,1],index.me(w.means),mean),tapply(mle.est[,2],index.me(w.means),mean)))) cv.Sigma[,,1] <- solve(cov(cbind(tapply(mle.est[,1],index.me(w.cv),mean),tapply(mle.est[,2],index.me(w.cv),mean)))) s2[1] <- .1 for(z in 1:(niter-1)){ #first I will sample from from the water level effects wl.est[,,z+1] <- mvnorm.samp(J,wl.id,X,Y,s2,wl.Sigma[,,z],wl.theta[z,]) cv.est[,,z+1] <- mvnorm.samp(K,cv.id,X,Y,s2,cv.Sigma[,,z],cv.theta[z,]) indiv.est[,,z+1] <- mvnorm.samp(J*K,indiv.id,X,Y,s2,indiv.Sigma[,,z],indiv.theta[z,]) #now i'll center all my draws wl.est.c[,,z+1] <- wl.est[,,z+1] - matrix(apply(wl.est[,,z+1],2,mean),nrow=J,ncol=2,byrow=TRUE) cv.est.c[,,z+1] <- cv.est[,,z+1] - matrix(apply(cv.est[,,z+1],2,mean),nrow=J,ncol=2,byrow=TRUE) indiv.est.c[,,z+1] <- indiv.est[,,z+1] - matrix(apply(indiv.est[,,z+1],2,mean),nrow=J*K,ncol=2,byrow=TRUE) #now I the estimates of theta and Sigma for each factor wl.samp <- mvnorm.hierarch.samp(J,wl.est[,,z],wl.Sigma[,,z],mle.est) wl.theta[z+1,] <- wl.samp$theta wl.Sigma[,,z+1] <- wl.samp$iSigma cv.samp <- mvnorm.hierarch.samp(J,cv.est[,,z],cv.Sigma[,,z],mle.est) cv.theta[z+1,] <- cv.samp$theta cv.Sigma[,,z+1] <- cv.samp$iSigma indiv.samp <- mvnorm.hierarch.samp(K*J,indiv.est[,,z],indiv.Sigma[,,z],mle.est) indiv.theta[z+1,] <- indiv.samp$theta indiv.Sigma[,,z+1] <- indiv.samp$iSigma #debug(mvnorm.hierarch.samp) #undebug(mvnorm.hierarch.samp) for(i in 1:49){ h.theta.est[i,,z+1] <- wl.theta[z+1,] + wl.est.c[index.me(w.means)[i],,z+1] + cv.est.c[index.me(w.cv)[i],,z+1] + indiv.est.c[i,,z+1] } plot(0,0,main=z) s2[z+1] <- sig.samp(J*K,Y,X,indiv.id,h.theta.est[,,z]) #Finally we can calculate some of the RSS' allowing me to calculate R^2 and other values #for(i in 1:(J*K)){ #e.y[,i,z] <- Y[indiv.id==i]- X[indiv.id==i,]%*%h.theta.est[i,,z] #} } #Now I will store all my data from a given run. mos.wl.est <- wl.est mos.cv.est <- cv.est mos.indiv.est <- indiv.est #this is the final estimation, the sum of all the parts. mos.h.theta.est <- h.theta.est #the .c is for centered around the mean. mos.wl.est.c <- wl.est.c mos.cv.est.c <- cv.est.c mos.indiv.est.c <- indiv.est.c #these matrices will hold the estimates of theta. mos.wl.theta <- wl.theta mos.cv.theta <- cv.theta mos.indiv.theta <- indiv.theta #this is the overall model variance mos.s2 <- s2 #these matrices hold the estimates of Sigma mos.wl.Sigma <- wl.Sigma mos.cv.Sigma <- cv.Sigma mos.indiv.Sigma <- indiv.Sigma solve(reconst.Sigma(chiro.indiv.Sigma)) plot(s2) plot(wl.theta) t(apply(h.theta.est[,2,],1,quantile,c(.25,.975))) h.theta.est2 <- h.theta.est plot(apply(h.theta.est[,2,],1,mean),apply(h.theta.est2[,2,] ,1,mean)) dim(wl.est.c) t(apply(wl.est.c[,2,],1,quantile,c(.025,.975))) t(apply(cv.est.c[,2,],1,quantile,c(.025,.975))) plot(apply(indiv.est.c[,2,],1,mean),ylim=c(-.1,.1)) for(i in 1:(J*K)){ lines(rep(i,2),t(apply(indiv.est.c[,2,],1,quantile,c(.025,.975)))[i,]) } abline(0,0) plot(Y~X[,2]) for(i in 1:J){ abline(apply(wl.est[i,,],1,mean),col=i) } apply(wl.est dim(h.theta.est[,1,]) plot(wl.theta) f.out.mat <-NULL for(z in 1:999){ out.mat <- NULL for(i in 1:49){ out.mat <- rbind(out.mat,t(t(e.y[,i,z]))) } f.out.mat <- cbind(f.out.mat,out.mat) }