## bkr.r # Bayesian Kernel Regression with Guassian Kernels # Using the collapse MCMC algorithm # Do not use the Dirichlet layer ##Load the shared library to calculate design matrix if(length(grep("x86_64", sessionInfo()$R.ver$system))==1){ dyn.load("kernelCalculation64.so"); }else{ dyn.load("kernelCalculation.so"); } ##Source common distributions and matrix actions source("bkrCommon.r"); ##getdesign() # Use cpp code in the shared library to calculate the kernel design matrix # get the design matrix for those nvec>0 columns only getdesign <- function(dMat, # n*d, data matrix cMat, # p*d, center matrix theta # list(L, nvec, ...) ){ nvec <- theta$nvec; cMat1 <- rbind(1, cMat); isi <- rep(0, sum(nvec>0)); if(nvec[1] > 0){ isi[1] <- 1; } z <- .C("getDesignCpp", # just use the exponent as.double(dMat), # n*d, data matrix vector as.double(cMat1[nvec>0,]), # p'*d, center matrix vector as.double(theta$L), # d*1, kernel vector as.integer(isi), # p'*1, indicator of intercept as.integer(dim(dMat)[1]), # n, number of observations as.integer(length(isi)), # p, number of kernels (model dimension) as.integer(dim(dMat)[2]), # d, observation dimension z = double(dim(dMat)[1]*length(isi)))$z; # n*p' design matrix z <- matrix(z, nc=length(isi)); return(z); } ##getfulldesign() # Use cpp code in the shared library to calculate the kernel design matrix # - get the full design matrix for intercept and all cMat columns getfulldesign <- function(dMat, # data matrix cMat, # center matrix theta # list(L, ...) ){ nvec <- theta$nvec; cMat1 <- rbind(1, cMat); isi <- c(1, rep(0, dim(cMat)[1])); z <- .C("getDesignCpp", # just use the exponent as.double(dMat), # n*d, data matrix vector as.double(cMat1), # (p+1)*d, center matrix vector as.double(theta$L), # d*1, kernel vector as.integer(isi), # (p+1)*1, indicator of intercept as.integer(dim(dMat)[1]), # n, number of observations as.integer(length(isi)), # p, number of kernels (model dimension) as.integer(dim(dMat)[2]), # d, observation dimension z = double(dim(dMat)[1]*length(isi)))$z; # n*p' design matrix z <- matrix(z, nc=length(isi)); return(z); } ##getmeanJ() # calculate meanJ from the approximated alpha stable process getmeanJ <- function(alpha, eps, gam ){ if(alpha <= 0 | alpha >= 2){ stop("ERROR: stable index alpha is not in (0, 2).") }else if(alpha == 1){ meanJ <- gam/eps; }else{ meanJ <- (gam*alpha^(1-alpha/2)/eps^alpha*sqrt(pi))* (gamma(alpha)*gamma(alpha/2)/gamma((alpha+1)/2))*sin(pi*alpha/2); } return(meanJ); } ##llike() # log likelihood calculation # marginalized beta in the conjugate normal-gamma setting # for binary model, likelihood conditional on hidden z values llike <- function(y, # continuous or 0/1 response X, # n*d covariate matrix theta, # list(p, nvec, varphi, beta, L, phi) modeltype, # 0/1, normal linear/binary probit fullXX=NULL # precalculated XX matrix ){ if(modeltype == 1){ y <- theta$z; theta$phi <- 1; } if(is.null(fullXX)){ XX <- getdesign(X, X, theta); }else{ XX <- matrix(fullXX[, theta$nvec>0], nc=sum(theta$nvec>0)); } varphiovern <- theta$varphi[theta$nvec>0]/theta$nvec[theta$nvec>0]; evv <- eigen(t(XX)%*%XX, symmetric=TRUE, EISPACK=TRUE); if(dim(XX)[2] == 1){ Sigma <- 1/(theta$phi*t(XX)%*%XX + varphiovern); }else{ Sigma <- evv$vectors %*% diag(1/(theta$phi*evv$values + varphiovern)) %*% t(evv$vectors) } mu <- theta$phi*Sigma%*%(t(XX)%*%y); ll <- length(y)/2*log(theta$phi/2/pi) + sum(log(varphiovern))/2 - sum(log(varphiovern + theta$phi*evv$values))/2 - (theta$phi*sum((y-XX%*%mu)^2) + sum(varphiovern*mu^2))/2; return(ll); } ##lprior() # log prior calculation # * without beta! (DO NOT INCLUDE prior for beta here) # in order to match up with the likelihood (no beta) # - for binary model, z is not involved # - prior on lambda: exponential # - n, d, meanJ is pre-saved in list fixed # - beta is integrated out lprior <- function(theta, # list(p, nvec, varphi, beta, L, phi) fixed, # list(n, d, alpha, eps, gam, meanJ, palpha, la, lb) modeltype # 0/1, normal linear/binary probit ){ nvec <- theta$nvec; J <- sum(nvec); L <- theta$L; n <- fixed$n; p <- rep(1, n+1)/(n+1); varphi <- theta$varphi[nvec>0]; if(modeltype == 0){ phi <- theta$phi; }else{ phi <- 1; } alpha <- fixed$alpha; eps <- fixed$eps; meanJ <- fixed$meanJ; lpriorL <- sum(dgamma(L, fixed$la, fixed$lb, log=T)); lp <- dpoiss(x=J, lambda=meanJ, log=TRUE) + dmultinom(nvec, size=J, prob=p, log=TRUE) + sum(dgamma(varphi, alpha/2, alpha*eps^2/2, log=T)) + lpriorL - log(phi); return(lp); } ##updatebeta() # update step for regression coefficients # - conjugate normal update conditional on rest # - use the posterior mean in regression (Rao-Blackwellization) # - use the posterior sample in probit model (fully Bayes) updatebeta <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(p, nvec, varphi, beta, L, phi) fixed, # list(n, d, alpha, eps, gam, meanJ, palpha, la, lb) modeltype, fullXX=NULL # precalculated XX matrix ){ if(modeltype == 1){ y <- theta$z; theta$phi <- 1; } if(is.null(fullXX)){ XX <- getdesign(X, X, theta); }else{ XX <- matrix(fullXX[, theta$nvec>0], nc=sum(theta$nvec>0)); } varphiovern <- theta$varphi[theta$nvec>0]/theta$nvec[theta$nvec>0]; evv <- eigen(t(XX)%*%XX, symmetric=TRUE, EISPACK=TRUE); ivals <- 1/(theta$phi*evv$values + varphiovern); if(dim(XX)[2] == 1){ Sigma <- 1/(theta$phi*t(XX)%*%XX + varphiovern); }else{ Sigma <- evv$vectors %*% diag(ivals) %*% t(evv$vectors) } mu <- theta$phi*Sigma%*%(t(XX)%*%y); theta$beta <- rep(0, 1+fixed$n); if(modeltype == 0){ # posterior mean (Rao-Blackwellization) theta$beta[theta$nvec>0] <- mu; }else{ theta$beta[theta$nvec>0] <- mu + evv$vectors %*% rnorm(length(mu), 0, sd=sqrt(ivals)); } return(theta); } ##birth() # birth step in RJ-MCMC # - keep (L, phi/z) fixed, only update (J, ci, varphi) # - birth from prior, random insert into existing kernels # - ci* uniformly from intercept and training sample index # - varphi* from gamma prior of varphi # - death according to multinomial draw w.r.t. power on varphi birth <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(ci, L, varphi, phi/z) fixed, # list(n, d, alpha, eps, gam, sizeJ, meanJ, la, lb) tune, # list(dpow, upow, varphistep, lstep, rstep, phistep, updtoss) pbd, # list(pbJ, pdJ, pbJp1, pdJp1, pbJm1, pdJm1) modeltype, fullXX=NULL ){ nvec <- theta$nvec; J <- sum(nvec); n <- fixed$n; meanJ <- fixed$meanJ; alpha <- fixed$alpha; eps <- fixed$eps; accbirth <- 0; newtheta <- theta; exptoss <- rexp(1); newci <- sample(n+1, 1); newtheta$nvec[newci] <- nvec[newci] + 1; if(nvec[newci]==0){ newtheta$varphi[newci] <- rgamma(1, alpha/2, alpha*eps^2/2); } deathprobs <- newtheta$nvec*(newtheta$varphi^tune$dpow); deathprob <- deathprobs[newci]/sum(deathprobs); logacc <- llike(y, X, newtheta, modeltype, fullXX) - llike(y, X, theta, modeltype, fullXX) + log(meanJ/newtheta$nvec[newci]) + log(deathprob) + log(pbd$pdJp1/pbd$pbJ); if(exptoss > - logacc){ theta <- newtheta; accbirth <- 1; } return(list(theta=theta, accbirth=accbirth)); } ##death() # death step in RJ-MCMC # - death according to multinomial draw w.r.t. power on varphi # - birth from prior, random insert into existing kernels # - ci* uniformly from intercept and training sample index # - varphi* from gamma prior of varphi death <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(ci, L, varphi, phi/z) fixed, # list(n, d, alpha, eps, gam, sizeJ, meanJ, la, lb) tune, # list(dpow, upow, varphistep, lstep, rstep, phistep, updtoss) pbd, # list(pbJ, pdJ, pbJp1, pdJp1, pbJm1, pdJm1) modeltype, fullXX=NULL ){ nvec <- theta$nvec; J <- sum(nvec); n <- fixed$n; meanJ <- fixed$meanJ; accdeath <- 0; newtheta <- theta; exptoss <- rexp(1); deathprobs <- nvec*(theta$varphi^tune$dpow); deathci <- sample(1:(n+1), 1, replace=T, prob=deathprobs); deathprob <- deathprobs[deathci]/sum(deathprobs); newtheta$nvec[deathci] <- newtheta$nvec[deathci] - 1; logacc <- llike(y, X, newtheta, modeltype, fullXX) - llike(y, X, theta, modeltype, fullXX) + log(nvec[deathci]/meanJ) - log(deathprob) + log(pbd$pbJm1/pbd$pdJ); if(exptoss > - logacc){ theta <- newtheta; accdeath <- 1; } return(list(theta=theta, accdeath=accdeath)); } ##updateci() # update step for center index in theta # eqivalent to a death + birth step together # - keep the number of kernels fixed # - first select one existing location j, with nj>0 # - selection probability is power on varphi ($upow) for those nj>0 # - delete the selected kernel and add a new kernel at random location # - if location new, also propose new varphi updateci <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(ci, L, varphi, phi/z) fixed, # list(n, d, alpha, eps, gam, sizeJ, meanJ, la, lb) tune, # list(dpow, upow, varphistep, lstep, rstep, phistep, updtoss) modeltype, fullXX=NULL ){ nvec <- theta$nvec; J <- sum(nvec); n <- fixed$n; meanJ <- fixed$meanJ; alpha <- fixed$alpha; eps <- fixed$eps; accupdateci <- 0; newtheta <- theta; exptoss <- rexp(1); dpsfromold <- nvec*(theta$varphi^tune$dpow); dpsfromold <- dpsfromold/sum(dpsfromold); delind <- sample(1:(n+1), 1, replace=T, prob=dpsfromold); newind <- sample(n+1, 1); newtheta$nvec[delind] <- newtheta$nvec[delind] - 1; newtheta$nvec[newind] <- newtheta$nvec[newind] + 1; if(nvec[newind] == 0){ newtheta$varphi[newind] <- rgamma(1, alpha/2, alpha*eps^2/2); } dpsfromnew <- newtheta$nvec*(newtheta$varphi^tune$dpow); dpsfromnew <- dpsfromnew/sum(dpsfromnew); logacc <- llike(y, X, newtheta, modeltype, fullXX) - llike(y, X, theta, modeltype, fullXX) + log(nvec[delind]/(nvec[newind]+1)) + log(dpsfromnew[newind]/dpsfromold[delind]); if(exptoss > - logacc){ theta <- newtheta; accupdateci <- 1; } return(list(theta=theta, accupdateci=accupdateci)); } ##updatevarphi() # update step for unit normal precisions # - only update one varphi, where nj>0 # - selection probability is uniform # - once selected, update via normal random walk on log scale ($varphistep) updatevarphi <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(ci, L, varphi, phi/z) fixed, # list(n, d, alpha, eps, gam, sizeJ, meanJ, la, lb) tune, # list(dpow, upow, varphistep, lstep, rstep, phistep, updtoss) modeltype, fullXX=NULL ){ nvec <- theta$nvec; varphi <- theta$varphi; n <- fixed$n; alpha <- fixed$alpha; eps <- fixed$eps; accupdatevarphi <- 0; newtheta <- theta; exptoss <- rexp(1); probs <- rep(1, n+1); probs[nvec == 0] <- 0; updind <- sample(1:(n+1), 1, replace=T, prob=probs); newtheta$varphi[updind] <- rlognorm(1, log(theta$varphi[updind]), tune$varphistep); logacc <- llike(y, X, newtheta, modeltype, fullXX) - llike(y, X, theta, modeltype, fullXX) + dgamma(newtheta$varphi[updind], alpha/2, alpha*eps^2/2, log=T) - dgamma(varphi[updind], alpha/2, alpha*eps^2/2, log=T) - log(varphi[updind]) + log(newtheta$varphi[updind]); if(exptoss > - logacc){ theta <- newtheta; accupdatevarphi <- 1; } return(list(theta=theta, accupdatevarphi=accupdatevarphi)); } ##updateL() updateL <- function(y, X, theta, fixed, tune, modeltype ){ la <- fixed$la; lb <- fixed$lb; accupdateL <- 0; newtheta <- theta; exptoss <- rexp(1); updind <- sample(fixed$d, 1); newtheta$L[updind] <- rlognorm(1, log(theta$L[updind]), tune$lstep); logacc <- llike(y, X, newtheta, modeltype) - llike(y, X, theta, modeltype) + dgamma(newtheta$L[updind], la, lb, log=T) - dgamma(theta$L[updind], la, lb, log=T) - log(theta$L[updind]) + log(newtheta$L[updind]); if(exptoss > - logacc){ theta <- newtheta; accupdateL <- 1; } return(list(theta=theta, accupdateL=accupdateL)); } ##updatephi() # update the noise precision in the normal linear model # - because beta is itegrated out, no conjugate update # - update via normal random walk on log scale ($phistep, updtoss) # - non-informative prior 1/phi get canceled with the proposal ratio updatephi <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(p, nvec, varphi, beta, L, phi) fixed, tune, modeltype, # 0/1, normal linear/binary probit fullXX=NULL # precalculated XX matrix ){ if(modeltype != 0){ stop("ERROR: cannot update phi for non normal linear models.") } accupdatephi <- 0; newtheta <- theta; exptoss <- rexp(1); newtheta$phi <- rlognorm(1, log(theta$phi), tune$phistep); logacc <- llike(y, X, newtheta, modeltype, fullXX) - llike(y, X, theta, modeltype, fullXX); if(exptoss > - logacc){ theta <- newtheta; accupdatephi <- 1; } return(list(theta=theta, accupdatediag=accupdatephi)); } ##updatez() # update the augmented normal variable z in a gibbs move for probit model # - no need for tuning parameter, always accept # - recommend first update beta, then z. # - any number whose absolute value > 20 shrinks to (+-)20 updatez <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(p, nvec, varphi, beta, L, phi) modeltype, # 0/1, normal linear/binary probit fullXX=NULL # precalculated XX matrix ){ if(modeltype != 1){ stop("ERROR: cannot update z for non probit models.") } if(is.null(fullXX)){ XX <- getdesign(X, X, theta); }else{ XX <- matrix(fullXX[, theta$nvec>0], nc=sum(theta$nvec>0)); } z <- rep(NA, length(y)); u <- runif(length(y)); Xb <- XX %*% theta$beta[theta$nvec>0]; z[y==1] <- Xb[y==1] + qnorm(u[y==1] * pnorm(Xb[y==1]) + pnorm(-Xb[y==1])); z[y==0] <- Xb[y==0] + qnorm(u[y==0] * pnorm(-Xb[y==0])); z[z > 20] <- 20; z[z < -20] <- -20; theta$z <- z; return(theta); } ##update() # update some parameters within rjmcmc # - update ci, varphi respectively # - exclude L, because fullXX may need recalculated. # - exclude the updates for z/beta/phi (alwasy update them in rjmcmc) update <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(p, nvec, varphi, beta, L, phi) fixed, tune, modeltype, fullXX=NULL ){ toss <- runif(1, min=0, max=1); if(toss < .5){ cur <- updateci(y, X, theta, fixed, tune, modeltype, fullXX); }else{ cur <- updatevarphi(y, X, theta, fixed, tune, modeltype, fullXX); } return(cur); } ##rjmcmcone() # one iteration of reversible jump mcmc # - choose birth/death/update # - always update z/beta/phi # - sometimes update L, and recalculate fullXX (tune$frequL) rjmcmcone <- function(y, # response varaible continuous/[0/1] depend on modeltype X, # n*d covariate matrix theta, # list(p, nvec, varphi, beta, L, phi) fixed, tune, modeltype, # 0/1, normal linear/binary probit fullXX=NULL ){ if(is.null(fullXX)){ fullXX <- getfulldesign(X, X, theta); } J <- sum(theta$nvec); if(J==1){ pbd <- list(pbJ=.8, pdJ=.0, pbJp1=.4, pdJp1=.4, pbJm1=NA, pdJm1=NA); }else if(J==2){ pbd <- list(pbJ=.4, pdJ=.4, pbJp1=.4, pdJp1=.4, pbJm1=.8, pdJm1=.0); }else{ pbd <- list(pbJ=.4, pdJ=.4, pbJp1=.4, pdJp1=.4, pbJm1=.4, pdJm1=.4); } toss <- runif(1, min=0, max=1); if(toss 0) && (ncol(x.test) != ncol(x.train))) stop("input x.test must have the same number of columns as x.train") if ((modeltype != 0) && (modeltype != 1)) stop("modeltype is not 0 nor 1") fixed$n <- dim(x.train)[1]; fixed$d <- dim(x.train)[2]; fixed$meanJ <- getmeanJ(fixed$alpha, fixed$eps, fixed$gam); # centering and scaling x.train.mean <- apply(x.train, 2, mean); x.train.sd <- apply(x.train, 2, sd); for(i in 1:fixed$d){ x.train[,i] <- (x.train[,i]-x.train.mean[i])/x.train.sd[i]; if(dim(x.test)[1] != 0){ x.test[,i] <- (x.test[,i]-x.train.mean[i])/x.train.sd[i]; } } if(fixed$d == 1){ x.train <- matrix(x.train, nc=1); x.test <- matrix(x.test, nc=1); } if(modeltype == 0){ y.train.mean <- mean(y.train); y.train.sd <- sd(y.train); y.train <- (y.train - y.train.mean)/y.train.sd; } if(is.null(theta$nvec)){ totalJ <- fixed$meanJ; theta$nvec <- as.vector(rmultinom(1, totalJ, rep(1, fixed$n+1)/(fixed$n+1))); } if(is.null(theta$varphi)){ theta$varphi <- rgamma(fixed$n+1, fixed$alpha/2, fixed$alpha*fixed$eps^2/2); } if(is.null(theta$L)){ theta$L <- rep(1, fixed$d) } if(is.null(theta$phi)){ theta$phi <- 1; } if(modeltype == 1){ if(is.null(theta$beta)){ theta$beta <- rep(0, fixed$n+1); } if(is.null(theta$z)){ theta <- updatez(y.train, x.train, theta, modeltype); } } fullXX <- NULL; for(i in 1:(keepevery*nburn)){ cur <- rjmcmcone(y.train, x.train, theta, fixed, tune, modeltype, fullXX); theta <- cur$theta; fullXX <- cur$fullXX; if(i %% printevery==0){ print(paste("burning iteration ", i, ", J=", sum(theta$nvec), ", max(nj)=", max(theta$nvec), sep="")); } } if(keeptrain == TRUE){ yhat.train <- matrix(NA, nr=dim(x.train)[1], nkeep); } if(dim(x.test)[1] != 0){ yhat.test <- matrix(NA, nr=dim(x.test)[1], nkeep); } theta.l <- matrix(NA, nr=dim(x.train)[2], nkeep); colnames(theta.l) <- colnames(x.train); if(modeltype == 0){ theta.phi <- rep(NA, nkeep); }else{ theta.phi <- rep(1, nkeep); } theta.nvec <- matrix(NA, nc=fixed$n+1, nr=nkeep); theta.varphi <- matrix(NA, nc=fixed$n+1, nr=nkeep); theta.beta <- matrix(NA, nc=fixed$n+1, nr=nkeep); theta.lambda <- matrix(NA, nc=fixed$d, nr=nkeep); theta.phi <- matrix(NA, nc=1, nr=nkeep); for(i in 1:(keepevery*nkeep)){ cur <- rjmcmcone(y.train, x.train, theta, fixed, tune, modeltype, fullXX); theta <- cur$theta; fullXX <- cur$fullXX; if(i %% printevery==0){ print(paste("posterior mcmc iteration ", i, ", J=", sum(theta$nvec), ", max(nj)=", max(theta$nvec), sep="")); } if(i %% keepevery==0){ theta.nvec[i/keepevery, ] <- theta$nvec; theta.varphi[i/keepevery, ] <- theta$varphi; theta.lambda[i/keepevery, ] <- theta$L; theta.phi[i/keepevery, 1] <- theta$phi; if(modeltype == 0){ theta <- updatebeta(y.train, x.train, theta, fixed, modeltype, fullXX); } theta.beta[i/keepevery, ] <- theta$beta; if(keeptrain == TRUE){ yhat.train[, i/keepevery] <- getdesign(x.train, x.train, theta) %*% theta$beta[theta$nvec>0]; } if(dim(x.test)[1] != 0){ yhat.test[, i/keepevery] <- getdesign(x.test, x.train, theta) %*% theta$beta[theta$nvec>0]; } } } colnames(theta.nvec) <- paste("n", 0:fixed$n, sep=""); colnames(theta.varphi) <- paste("v", 0:fixed$n, sep=""); colnames(theta.beta) <- paste("b", 0:fixed$n, sep=""); colnames(theta.lambda) <- paste("l", 1:fixed$d, sep=""); colnames(theta.phi) <- "phi"; bkrreturn <- list(theta.nvec = theta.nvec, theta.varphi = theta.varphi, theta.beta = theta.beta, theta.lambda = theta.lambda, theta.phi = theta.phi); if(keeptrain == TRUE){ yhat.train.mean <- apply(yhat.train, 1, mean); if(modeltype == 0){ yhat.train.mean <- yhat.train.mean * y.train.sd + y.train.mean; } bkrreturn$yhat.train.mean <- yhat.train.mean; } if(dim(x.test)[1] != 0){ yhat.test.mean <- apply(yhat.test, 1, mean); if(modeltype == 0){ yhat.test.mean <- yhat.test.mean * y.train.sd + y.train.mean; } bkrreturn$yhat.test.mean <- yhat.test.mean; } return(bkrreturn); }