## examples.r # Simulation and real data analysis example source("bkr.r"); source("comparison.r"); simf <- function(x, type) { if (type == 1) { 10*sin(pi*x[,1]*x[,2]) + 20*(x[,3]-.5)^2 + 10*x[,4] + 10*x[,5]; }else if (type == 2) { sqrt((x[,1]^2 + (x[,2]*x[,3] - 1/x[,2]/x[,4])^2)); }else if (type == 3) { atan((x[,2]*x[,3] - 1/x[,2]/x[,4])/x[,1]); } } simfe <- function(n, type, sd) { if (type == 1) { x <- matrix(runif(n*10, min=0, max=1), nc=10); y <- simf(x, 1); y <- y + rnorm(n, mean=0, sd=sd); } else if (type == 2) { x <- cbind(runif(n, min=0, max=100), runif(n, min=40*pi, max=560*pi), runif(n, min=0, max=1), runif(n, min=1, max=11)); y <- simf(x, 2); y <- y + rnorm(n, mean=0, sd=sd); } else if (type == 3) { x <- cbind(runif(n, min=0, max=100), runif(n, min=40*pi, max=560*pi), runif(n, min=0, max=1), runif(n, min=1, max=11)); y <- simf(x, 3); y <- y + rnorm(n, mean=0, sd=sd); } return(list(x=x, y=y)); } simcircle <- function(n, dim) { if (dim < 2) { stop ("number of variables must be >= 2."); } x <- matrix(runif(n*dim, min=-1, max=1), nr=n); r2 <- x[, 1]^2 + x[, 2]^2; y <- rep(0, n); y[r2 <= 2/pi] <- 1; return (list(x=x, y=y)); } getdata <- function(dataname) { if (dataname == "BostonHousing") { ###Boston Housing Data Set ## The original Boston housing data set without spatial coordinate # library(mlbench) # data(BostonHousing) # write.csv(BostonHousing, "datasets/BostonHousing.csv", row.names=FALSE) datatable <- read.csv("datasets/BostonHousing.csv"); dataset <- list(x=c(), y=c()); dataset$x <- matrix(as.numeric(as.matrix(datatable[, 1:13])), nc=13); colnames(dataset$x) <- names(datatable)[1:13]; dataset$y <- datatable[, 14]; } else if (dataname == "BodyFat") { ###Body Fat Data Set ## follow the instructions in package "mfp" ## correct item 42, height 29.5 -> 69.5 ## removed case and bronzek, use siri as response # library(mfp) # data(bodyfat) # bodyfat[42, 7] <- 69.5 # bodyfat <- bodyfat[, -c(1:2)] # write.csv(bodyfat, "datasets/BodyFat.csv", row.names=FALSE) datatable <- read.csv("datasets/BodyFat.csv"); dataset <- list(x=c(), y=c()); dataset$x <- as.matrix(datatable[, -1]); dataset$y <- datatable[, 1]; } else if (dataname == "Basketball") { ###Basketball Data Set ## http://lib.stat.cmu.edu/datasets/smoothmeth ## Search baskball.dat ## Obtain 96 * 6 data from the web, and store it in /datasets datatable <- read.csv("datasets/Basketball.csv"); dataset <- list(x=c(), y=c()); dataset$x <- as.matrix(datatable[, -c(1, 6)]); dataset$y <- datatable[, 1]; } else if (dataname == "Ionosphere") { ###Ionosphere Data Set ## The second variable is deleted, because it is all zeros. # library(mlbench) # data(Ionosphere) # write.csv(Ionosphere, "datasets/Ionosphere.csv", row.names=FALSE) datatable <- read.csv("datasets/Ionosphere.csv"); dataset <- list(x=c(), y=c()); dataset$x <- matrix(as.numeric(as.matrix(datatable[, 1:34])), nc=34); dataset$x <- dataset$x[, -2]; dataset$y <- as.numeric(datatable$Class) - 1; } else if (dataname == "Sonar") { ###Sonar Data Set # library(mlbench) # data(Sonar) # write.csv(Sonar, "datasets/Sonar.csv", row.names=FALSE) datatable <- read.csv("datasets/Sonar.csv"); dataset <- list(x=c(), y=c()); dataset$x <- matrix(as.numeric(as.matrix(datatable[, 1:60])), nc=60); dataset$y <- as.numeric(datatable[, 61]) - 1; } else if (dataname == "WDBC") { ###WDBC Wisconsin Diagnostic Breast Cancer ## http://archive.ics.uci.edu/ml/ ## machine-learning-databases/breast-cancer-wisconsin/ ## no variable names datatable <- read.csv("datasets/WDBC.csv", header=F); dataset <- list(x=c(), y=c()); dataset$x <- as.matrix(datatable[, 3:32], nc=30); dataset$y <- as.numeric(datatable[, 2]) - 1; } else { stop(paste(dataname, " cannot be found in datasets directory.", sep="")); } return(dataset); } savetheta <- function(bkrreturn, # bkr returned object (list) outname # output name ) { write.csv(bkrreturn$theta.nvec, paste("output/", outname, ".nvec.txt", sep=""), row.names=FALSE, append=FALSE); write.csv(bkrreturn$theta.beta, paste("output/", outname, ".beta.txt", sep=""), row.names=FALSE, append=FALSE); write.csv(bkrreturn$theta.varphi, paste("output/", outname, ".varphi.txt", sep=""), row.names=FALSE, append=FALSE); write.csv(bkrreturn$theta.l, paste("output/", outname, ".lambda.txt", sep=""), row.names=FALSE, append=FALSE); write.csv(bkrreturn$theta.phi, paste("output/", outname, ".phi.txt", sep=""), row.names=FALSE, append=FALSE); } runexample <- function(name, # name of the example nrep=1, # number of repeated expriments para=NULL, # additional parameter list save=FALSE, # whether to save posterior thetas outname=NULL # output file name ) { if (is.null(outname)){ outname <- name; } compname <- paste("output/", outname, ".comp.txt", sep=""); if (is.null(para$ntrain)) { para$ntrain <- 200; } if (is.null(para$ntest)) { para$ntest <- 1000; } if (length(grep("Friedman", name)) == 1) { type <- as.numeric(substring(name, 9, 9)); if (!(type %in% c(1, 2, 3))) { stop("wrong name in Friedman example, type not 1, 2, 3."); } sdvec <- c(1, 125, 0.1); for (iii in 1:nrep) { dataset <- simfe(para$ntrain, type=type, sd=sdvec[type]); testset <- simfe(para$ntest, type=type, sd=0); models <- simcomp(dataset = dataset, testset = testset, outname = compname, modeltype = 0); } } else if (length(grep("Circle", name)) == 1) { type <- as.numeric(substring(name, 7, 10)); if(is.na(type)){ stop("wrong name in Circle example, dimension not numeric.") } for (iii in 1:nrep) { dataset <- simcircle(para$ntrain, type); testset <- simcircle(para$ntest, type); models <- simcomp(dataset = dataset, testset = testset, outname = compname, modeltype = 1); } } else if (name %in% c("BostonHousing", "BodyFat", "Basketball")) { dataset <- getdata(name); for (iii in 1:nrep) { models <- cvcomp(dataset = dataset, nfold = 5, outname = compname, modeltype = 0); } } else if (name %in% c("Ionosphere", "Sonar", "WDBC")) { dataset <- getdata(name); for (iii in 1:nrep) { models <- cvcomp(dataset = dataset, nfold = 5, outname = compname, modeltype = 1); } } else { stop("Unkown example."); } if (save == TRUE){ savetheta(models$bkr, outname); } return(models); } runexample1 <- function(name, # name of the example nrep=1, # number of repeated expriments para=NULL, # additional parameter list save=FALSE, # whether to save posterior thetas outname=NULL # output file name ) { if (is.null(outname)){ outname <- name; } compname <- paste("output/", outname, ".comp.txt", sep=""); if (is.null(para$ntrain)) { para$ntrain <- 200; } if (is.null(para$ntest)) { para$ntest <- 1000; } if (length(grep("Friedman", name)) == 1) { type <- as.numeric(substring(name, 9, 9)); if (!(type %in% c(1, 2, 3))) { stop("wrong name in Friedman example, type not 1, 2, 3."); } sdvec <- c(1, 125, 0.1); dataset <- simfe(para$ntrain, type=type, sd=sdvec[type]); testset <- simfe(para$ntest, type=type, sd=0); for (iii in 1:nrep) { models <- simcomp(dataset = dataset, testset = testset, outname = compname, modeltype = 0); } } else if (length(grep("Circle", name)) == 1) { type <- as.numeric(substring(name, 7, 10)); if(is.na(type)){ stop("wrong name in Circle example, dimension not numeric.") } dataset <- simcircle(para$ntrain, type); testset <- simcircle(para$ntest, type); for (iii in 1:nrep) { models <- simcomp(dataset = dataset, testset = testset, outname = compname, modeltype = 1); } } else if (name %in% c("BostonHousing", "BodyFat", "Basketball")) { dataset <- getdata(name); for (iii in 1:nrep) { models <- cvcomp1(dataset = dataset, nfold = 5, outname = compname, modeltype = 0); } } else if (name %in% c("Ionosphere", "Sonar", "WDBC")) { dataset <- getdata(name); for (iii in 1:nrep) { models <- cvcomp1(dataset = dataset, nfold = 5, outname = compname, modeltype = 1); } } else { stop("Unkown example."); } if (save == TRUE){ savetheta(models$bkr, outname); } return(models); }