# comparison.r # compare bkr result with BART (BayesTree),and SVM (kernlab) # BayesTree is installed for both 32 bit and 64 bit machine at system level library("BayesTree"); # kernlab is only installed under Zhi's local directories if(length(grep("x86_64", sessionInfo()$R.ver$system))==1){ library("kernlab", lib="/home/grad/zo2/.Rlibs64"); }else{ library("kernlab", lib="/home/grad/zo2/.Rlibs"); } ##cvcomp() # n-fold cross-validation comparison cvcomp <- function(dataset, # list(y[n vector], x[nxd matrix]) nfold, # fold of the cross validation outname, # output file name modeltype, # [0/1]: regression/classification keepevery = 500, nburn = 4000, nkeep = 4000, printevery = 50000, fixed = list(alpha = 1, eps = 0.5, gam = 10, la = 0.2, lb = 1), tune = list(dpow = 1, upow = 0, varphistep = 0.5, lstep = 0.3, phistep = 1, frequL = 0.2), ri = c() # random index of cross validation ){ tn <- length(dataset$y); if (dim(dataset$x)[1] != tn) { stop("The number of rows of x is different from length of y in the dataset."); } if (length(ri) == 0) { ri <- sample(tn); } ris <- list(); numin1fold <- floor(tn / nfold); for (i in 1:nfold) { if (i < nfold) { ris[[i]] <- ri[c(1:numin1fold) + numin1fold * (i-1)]; } else { ris[[i]] <- ri[c((1 + numin1fold * (i-1)):tn)]; } } results <- matrix(0, nr = nfold, nc = 3); for(i in 1:nfold){ x.train <- matrix(dataset$x[-ris[[i]], ], nc=dim(dataset$x)[2]); x.test <- matrix(dataset$x[ris[[i]], ], nc=dim(dataset$x)[2]); y.train <- dataset$y[-ris[[i]]]; y.test <- dataset$y[ris[[i]]]; print(paste(outname, " CV fold [", i, "], BKR 0", sep="")); bkr.fit <- bkr(x.train, y.train, x.test, modeltype = modeltype, keepevery = keepevery, nburn = nburn, nkeep = nkeep, printevery = printevery, fixed = fixed, tune = tune ); if (modeltype == 0) { bart.fit <- bart(x.train, y.train, x.test); svm.fit <- ksvm(y.train ~ x.train); results[i, ] <- c(mean((bkr.fit$yhat.test.mean-y.test)^2), mean((apply(bart.fit$yhat.test, 2, mean)-y.test)^2), mean((predict(svm.fit, x.test)-y.test)^2) ); } else { bart.fit <- bart(x.train, as.factor(y.train), x.test); svm.fit <- ksvm(as.factor(y.train) ~ x.train); results[i, ] <- c(mean((bkr.fit$yhat.test.mean>0)!=y.test), mean((apply(bart.fit$yhat.test, 2, mean)>0)!=y.test), mean(predict(svm.fit, x.test)!=y.test) ); } #print(paste("result for CV fold [", i, "]: ", results[i, ], sep="")); } ave.result <- apply(results, 2, mean); if(modeltype == 0){ ave.result <- ave.result/min(ave.result); } write(ave.result, file=outname, ncolumn=3, append=T); return(list(bkr=bkr.fit, bart=bart.fit, svm=svm.fit)); } ##simcomp() # simulation study comparison simcomp <- function(dataset, # list(y[n vector], x[nxd matrix]) testset, # list(y[m vector, true val], x[mxd matrix]) outname, # output file name modeltype, # [0/1]: regression/classification keepevery = 500, nburn = 200, nkeep = 2000, printevery = 5000, fixed = list(alpha = 1, eps = 0.5, gam = 10, la = 0.2, lb = 1), tune = list(dpow = 1, upow = 0, varphistep = 0.5, lstep = 0.3, phistep = 1, frequL = 0.2) ){ x.train <- dataset$x; y.train <- dataset$y; x.test <- testset$x; y.test <- testset$y; print(paste(outname, " SIM study, BKR 0", sep="")); bkr.fit <- bkr(x.train, y.train, x.test, modeltype = modeltype, keepevery = keepevery, nburn = nburn, nkeep = nkeep, printevery = printevery, fixed = fixed, tune = tune ); if (modeltype == 0) { bart.fit <- bart(x.train, y.train, x.test); svm.fit <- ksvm(y.train ~ x.train); results <- c(mean((bkr.fit$yhat.test.mean-y.test)^2), mean((apply(bart.fit$yhat.test, 2, mean)-y.test)^2), mean((predict(svm.fit, x.test)-y.test)^2) ); } else { bart.fit <- bart(x.train, as.factor(y.train), x.test); svm.fit <- ksvm(as.factor(y.train) ~ x.train); results <- c(mean((bkr.fit$yhat.test.mean>0)!=y.test), mean((apply(bart.fit$yhat.test, 2, mean)>0)!=y.test), mean(predict(svm.fit, x.test)!=y.test) ); } if(modeltype == 0){ ave.result <- ave.result/min(ave.result); } write(results, file=outname, ncolumn=3, append=T); return(list(bkr=bkr.fit, bart=bart.fit, svm=svm.fit)); } cvcomp1 <- function(dataset, # list(y[n vector], x[nxd matrix]) nfold, # fold of the cross validation outname, # output file name modeltype, # [0/1]: regression/classification keepevery = 500, nburn = 4000, nkeep = 4000, printevery = 10000, fixed = list(alpha = 1, eps = 0.5, gam = 20, la = 0.2, lb = 1), tune = list(dpow = 1, upow = 0, varphistep = 0.5, lstep = 0.3, phistep = 1, frequL = 0.1), ri = c() # random index of cross validation ){ tn <- length(dataset$y); if (dim(dataset$x)[1] != tn) { stop("The number of rows of x is different from length of y in the dataset."); } if (length(ri) == 0) { ri <- sample(tn); } ris <- list(); numin1fold <- floor(tn / nfold); for (i in 1:nfold) { if (i < nfold) { ris[[i]] <- ri[c(1:numin1fold) + numin1fold * (i-1)]; } else { ris[[i]] <- ri[c((1 + numin1fold * (i-1)):tn)]; } } results <- matrix(0, nr = nfold, nc = 3); for(i in 1:1){ x.train <- matrix(dataset$x[-ris[[i]], ], nc=dim(dataset$x)[2]); x.test <- matrix(dataset$x[ris[[i]], ], nc=dim(dataset$x)[2]); y.train <- dataset$y[-ris[[i]]]; y.test <- dataset$y[ris[[i]]]; print(paste(outname, " CV fold [", i, "], BKR 0", sep="")); bkr.fit <- bkr(x.train, y.train, x.test, modeltype = modeltype, keepevery = keepevery, nburn = nburn, nkeep = nkeep, printevery = printevery, fixed = fixed, tune = tune ); if (modeltype == 0) { bart.fit <- bart(x.train, y.train, x.test); svm.fit <- ksvm(y.train ~ x.train); results[i, ] <- c(mean((bkr.fit$yhat.test.mean-y.test)^2), mean((apply(bart.fit$yhat.test, 2, mean)-y.test)^2), mean((predict(svm.fit, x.test)-y.test)^2) ); } else { bart.fit <- bart(x.train, as.factor(y.train), x.test); svm.fit <- ksvm(as.factor(y.train) ~ x.train); results[i, ] <- c(mean((bkr.fit$yhat.test.mean>0)!=y.test), mean((apply(bart.fit$yhat.test, 2, mean)>0)!=y.test), mean(predict(svm.fit, x.test)!=y.test) ); } print(paste("result for CV fold [", i, "]: ", results[i, ], sep="")); } ave.result <- apply(results, 2, mean); if(modeltype == 0){ ave.result <- ave.result/min(ave.result); } write(ave.result, file=outname, ncolumn=3, append=T); return(list(bkr=bkr.fit, bart=bart.fit, svm=svm.fit)); }