function [beta,v,prob,haty,e,ypred,vpred]=mregpredict(y,x,ipred) % fits a multiple linear regression model to response data y % using predictor matrix x % % Uses only a subset of the data for model fitting: index vector % ipred is the set of samples to be predicted, not used to fit % % % ouputs are the LSE beta, the SDs of elements of beta in V, % approximate probability level for each coeff, fitted values haty and residuals e % % ypred is the vector of predicted values, with SDs vpred % also plots the predictions, and superimposes known outcomes % % y is nx1 column vector, x is pxn matrix % plots betas and approximate 95% intervals [p,n]=size(x); itrain=1:n; itrain(ipred)=[]; ntrain=length(itrain); npred=length(ipred); X=x; X=X(:,itrain); [p,n]=size(X); Y=reshape(y(itrain),n,1); % model fit ... H=[ones(n,1),X']; % create design matrix with intercept and center genes V=inv(H'*H); beta=V*H'*Y; % least squares coefficients haty=H*beta; % fitted values e=Y-haty; % residuals s=sum(e'*e)/(n-length(beta)); % estimate of residual variance v=sqrt(s*diag(V)); % compute standard errors of beta coefficients df=n-length(beta); % next plot betas and approximate 90% intervals prob=2*(1-pt(abs(beta./v),df)); % prob level for each coeff qtc=qt(0.95,df); figure(2); clf plot([0 1+p],[0 0]); hold on for j=1:p plot([j,j],[beta(j+1)+qtc*v(j+1),beta(j+1)-qtc*v(j+1)],'k') text(j,beta(j+1),'+','HorizontalAlignment','Center') end; xlabel('Predictor gene index'); ylabel('Coeff') title('Regression coefficients') hold off; % now predictions ... figure(3); clf [p,n]=size(x); X=x(:,ipred); H=[ones(npred,1),X']; ypred=H*beta; vpred=sqrt(s*(diag(H*V*H')+ones(npred,1))); plot([min(ipred)-1,max(ipred)+1], [0 0]); hold on scatter(ipred,y(ipred)) for j=1:npred plot([ipred(j),ipred(j)],[ypred(j)+qtc*vpred(j),ypred(j)-qtc*vpred(j)],'k') text(ipred(j),ypred(j),'+','HorizontalAlignment','Center') end; xlabel('Prediction sample index'); ylabel('Response') title('Out of sample predictions') hold off; %