# Read in pollution data x <- read.table("pollution") dim(x) wind <- x[,2] temp <- x[,3] humi <- x[,4] inso <- x[,5] oxid <- x[,6] par(mfrow=c(2,2),bty='n') plot( wind, oxid ) plot( temp, oxid ) plot( humi, oxid ) plot( inso, oxid ) cor( wind, oxid ) cor( temp, oxid ) cor( humi, oxid ) cor( inso, oxid ) options(contrasts=c("contr.treatment")) ########################################################################### # explore regressions on each individual meteorological variable # -- look at R^2 measure of "fit": Highest correlation <-> highest R^2 summary( lm( oxid~wind) )$r.squared summary( lm( oxid~temp) )$r.squared summary( lm( oxid~humi) )$r.squared summary( lm( oxid~inso) )$r.squared ########################################################################### # explore regression on temperature fit <- lm( oxid~ temp ) print(fit) summary(fit) # plot fitted least square line plot( temp, oxid ) lines( temp,fit$fitted.values,col=2) # messy as temp is not ordered: redo: # i <- order(temp); lines( temp[i],fit$fitted.values[i],col=2) ########################################################################### # now predictions .. include exact central probability intervals at each x qt( 0.975, 30-2) newtemp <- 65:95 # range of TEMP values to predict OXID at pre <- predict.lm(fit, data.frame(temp=newtemp) , se.fit=T) plot( temp, oxid , xlim=c(65,92), ylim=c(0,32)) ss <- 3.997^2 haty <- pre$fit vy <- sqrt(ss+pre$se.fit^2) # mean and S.E. of predictive distns lines(newtemp,haty,col=2) lines(newtemp,haty+2.048*vy,col=3) lines(newtemp,haty-2.048*vy,col=3) ########################################################################### # Now, compare predictive distributions at 3 values of TEMP, and # also with predictive based on OXID as a normal random sample # Set up range for plots ... y <- seq(0,32,length=500) # Plot data and predictive line for reference hist(oxid,nclass=15,prob=T,xlim=c(0,32),ylim=c(0,0.2), ylab="predictive",xlab="oxid") # Then, first, the linear regression prediction at TEMP=65 degrees j <- 1 temp[j] m <- haty[j]; m v <- vy[j]; v lines(y,dt((y-m)/v,28)/v) # Second, at TEMP=79 j <- 15 temp[j] m <- haty[j]; m v <- vy[j]; v lines(y,dt((y-m)/v,28)/v, col=2) # And third at TEMP=94 j <- 30 temp[j] m <- haty[j]; m v <- vy[j]; v lines(y,dt((y-m)/v,28)/v, col=3) # Finally, compare the predictive from the normal random sample model: m <- mean(oxid) v <- sqrt(var(oxid)*(1+1/n)) # scale factor of predictive t distn in normal sample model lines(y,dt((y-m)/v,29)/v, col=1, lty=2) ########################################################################### # Exceedance levels: What's the chance of OXID>17.5 as a function of TEMP? plot(newtemp,1-pt((17.5-haty)/vy,28),type="l",ylab="exceedance prob") # .. and of OXID>19.5? lines(newtemp,1-pt((19.5-haty)/vy,28),col=2) # .. and of OXID>21? lines(newtemp,1-pt((21-haty)/vy,28),col=3) ########################################################################### # plot vs two variables -- spinning version spin( matrix(c(oxid,temp,wind),30,3), collab=c("oxid","temp","wind"),highlight=rep(T,30))