function [beta,li,alli]=mbinregsvd(x,Z,ivalid,ngene,nfac,nit,cvi) %===================================================================== % % Fits estimates in the binary regression model with Bayesian stochastic regularisation % % INPUTS: % x is the expression data to be used -- genes are rows, arrays are columns % Z is the binary indicator of class membership % ivalid is a vector of array numbers of validation cases % ngene is the number of genes to be selected -- those most highly correlated with % outcome on the training arrays % nfac is the number of supergene factors to include, the rest are ignored % nit is the number of iterates of the mode finding algorithm % cvi is 1 if you want '1 at a time' cross-validation analyses too, 0 if not % % OUTPUTS: % beta is the estimated regression vector for genes % stbeta are the sds % li the top ngene genes selected in the training set analysis % alli the indices of all genes selected in the CV optimisations % % data & parameters [N,n]=size(x); nvalid=length(ivalid); itrain=1:n; itrain(ivalid)=[]; ntrain=length(itrain); k=2; % reduce to top most correlated ngene ... z=Z; z(ivalid)=[]; X=x; X(:,ivalid)=[]; ind=select_genes_cor(X,z,ngene); li=ind(1:ngene); alli=[]; 'running optimised training set analysis ' X=x(li,:)-repmat(mean(x(li,:),2),1,n); [A,D,F]=svd_mw(X); D=D'; [p,n]=size(F); j=1:min(min(ngene,nfac),p); A=A(:,j); D=D(j); F=F(j,:); [p,n]=size(F); ln=1/sqrt(n); F=([ln*ones(1,n);F]); D=[sqrt(n);D]; D2=D.*D; p=p+1; y=randn(n,1); data=reshape(Z,n,1); gamma=zeros(p,1); tau2=ones(p,1); vi=tau2./(1+tau2); my=F'*gamma; m=[]; for j=1:nit phi=pnorm(-my,0,1); if (nvalid>0), data(ivalid)=0.5*ones(nvalid,1)<1-phi(ivalid); end; phiden=exp(-my.*my/2)/sqrt(2*3.14159); y=my+(2*data-1).*phiden./(data-(2*data-1).*phi); gamma=vi.*(F*y); my=F'*gamma; tau2=(k+(gamma.^2))/(k+1); vi=tau2./(1+tau2); m=[m,gamma]; if (floor(j/1000)==j/1000) j end; end; gamma=gamma(2:p); fit=my; pfit=pnorm(fit,0,1); r=mean(pfit); r=r/(1-r); pfit=pfit./(pfit+(1-pfit)*r); F=F(2:p,:); D2=D2(2:p); vi=vi(2:p); p=p-1; beta=A*gamma; [g,h]=sort(abs(gamma)); h=flipud(h); figure(1); set(gca,'FontSize',12); if (nfac>1) newbcpairs(F(:,itrain),Z(itrain),h(1:2),itrain); % newbcpairs(F,Z,1:4,1:n); hold off; else i=1:ntrain; f=F(itrain); z=Z(itrain); i=i(z==0); scatter(f(i),z(i)); hold on; i=1:ntrain; i=i(z==1); scatter(f(i),z(i),'r'); hold off; end; figure(2); set(gca,'FontSize',12); subplot(2,1,1); plot(gamma,'--bs','MarkerSize',6); hold on; title('Expression weights: Metagenes'); xlabel('Metagene'); ylabel('gamma') plot([0 p],[0 0],'black--'); hold off subplot(2,1,2); plot(beta); hold on; plot([0 ngene],[0 0],'black--'); hold off title('Expression weights: Genes') xlabel('Gene'); ylabel('beta') hold off; figure(6); set(gca,'FontSize',12); mplot(m', 1:min(3,p)); title('Iterates of parameters'); figure(3); set(gca,'FontSize',12); plot([min(fit) max(fit)],[.5 .5],'black--'); hold on; axis([min(fit) max(fit) 0 1]); z=1:n; z(ivalid)=[]; ZZ=Z; ZZ(ivalid)=[]; z0=z(ZZ==0); z1=z(ZZ==1); for j=z1 % text(fit(j),pfit(j),int2str(j),'Color','r','HorizontalAlignment','Center') text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','r','HorizontalAlignment','Center') end; for j=z0 % text(fit(j),pfit(j),int2str(j),'Color','b','HorizontalAlignment','Center') text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','b','HorizontalAlignment','Center') end; title('Fitted classification probabilities and outcomes') xlabel('Metagene score'); ylabel('Probability'); hold off; if (nvalid>0) % figure(4); set(gca,'FontSize',12); plot([min(fit) max(fit)],[.5 .5],'black--'); hold on; axis([min(fit) max(fit) 0 1]); z=ivalid; ZZ=Z(ivalid); z0=z(ZZ==0); z1=z(ZZ==1); for j=z1 % text(fit(j),pfit(j),int2str(j),'Color','r','HorizontalAlignment','Center') text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','r','HorizontalAlignment','Center') end; for j=z0 % text(fit(j),pfit(j),int2str(j),'Color','b','HorizontalAlignment','Center') text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','b','HorizontalAlignment','Center') end; title('CV probabilities: Validation cases') xlabel('Metagene score'); ylabel('Probability'); hold off; % end; hold off; alli=[]; li=reshape(li,ngene,1); if (cvi==1) %%% training cases ... [N,n]=size(x); fitcv=fit; pfitcv=pfit; for ij=1:ntrain ix=itrain(ij); ['running analysis ',int2str(ix) ] % reduce to most correlated ngene ignoring the hold out and validation cases iv=sort(unique([ix ivalid])); z=Z; z(iv)=[]; X=x; X(:,iv)=[]; s=[]; ind=select_genes_cor(X,z,ngene); si=ind(1:ngene); alli=unique([alli si]); X=x(si,:)-repmat(mean(x(si,:),2),1,n); [A,D,F]=svd_mw(X); D=D'; [p,n]=size(F); j=1:min(min(ngene,nfac),p); A=A(:,j); D=D(j); F=F(j,:); [p,n]=size(F); ln=1/sqrt(n); F=([ln*ones(1,n);F]); D=[sqrt(n);D]; D2=D.*D; p=p+1; y=randn(n,1); data=reshape(Z,n,1); gamma=zeros(p,1); tau2=ones(p,1); vi=tau2./(1+tau2); my=F'*gamma; for j=1:nit phi=pnorm(-my,0,1); data(ix)= 0.5<1-phi(ix); phiden=exp(-my.*my/2)/sqrt(2*3.14159); y=my+(2*data-1).*phiden./(data-(2*data-1).*phi); gamma=vi.*(F*y); my=F'*gamma; tau2=(k+(gamma.^2))/(k+1); vi=tau2./(1+tau2); if (floor(j/1000)==j/1000) j end; end; fitcv(ix)=my(ix); pfitcv(ix,:)=pnorm(my(ix),0,1); %%%% r=mean(pfitcv); r=r/(1-r); pfitcv(ix,:)=pfitcv(ix,:)/(pfitcv(ix,:)+(1-pfitcv(ix,:))*r); end; %%% end training cases figure(5); set(gca,'FontSize',12); plot([min(fitcv) max(fitcv)],[.5 .5],'black--'); hold on; axis([min(fitcv) max(fitcv) 0 1]); z=itrain; ZZ=Z(itrain); z0=z(ZZ==0); z1=z(ZZ==1); for j=z1 text(fitcv(j),pnorm(fitcv(j),0,1),int2str(j),'Color','r','HorizontalAlignment','Center') end; for j=z0 text(fitcv(j),pnorm(fitcv(j),0,1),int2str(j),'Color','b','HorizontalAlignment','Center') end; title('CV probabilities and outcomes: Training sample') xlabel('Metagene score'); ylabel('Probability'); hold off; % end; alli=reshape(alli,length(alli),1);