function [beta,li,alli]=Mbinregsvd(x,Z,ivalid,ngene,nfac,nmc,cvi) %===================================================================== % % Fits full posterior in the binary regression model with Bayesian stochastic regularisation % using iterative similation MCMC methods % % % INPUTS: % x is the expression data to be used -- genes are rows, arrays are columns % Z is the binary indicator of class membership % ivalid is a vector of array numbers of validation cases % ngene is the number of genes to be selected -- those most highly correlated with % outcome on the training arrays % nfac is the number of supergene factors to include, the rest are ignored % nmc is a 3 vector: % nit is the number of iterates to summarise the MCMC % nbi is the initial number to burn-in and discard % nskip is the number of skips between saves % cvi is 1 if you want '1 at a time' cross-validation analyses too, 0 if not % % OUTPUTS: % beta is the estimated regression vector for genes % li the top ngene genes selected in the training set analysis % alli the indices of all genes selected in the CV optimisations % % data & parameters [N,n]=size(x); nvalid=length(ivalid); itrain=1:n; itrain(ivalid)=[]; ntrain=length(itrain); k=2; nit=nmc(1); nbi=nmc(2); nskip=nmc(3); % reduce to top most correlated ngene ... z=Z; z(ivalid)=[]; X=x; X(:,ivalid)=[]; ind=select_genes_cor(X,z,ngene); li=ind(1:ngene); alli=[]; 'running optimised training set analysis ' X=x(li,:)-repmat(mean(x(li,:),2),1,n); [A,D,F]=svd_mw(X); D=D'; [p,n]=size(F); j=1:min(min(ngene,nfac),p); A=A(:,j); D=D(j); F=F(j,:); [p,n]=size(F); ln=1/sqrt(n); F=([ln*ones(1,n);F]); D=[sqrt(n);D]; D2=D.*D; p=p+1; y=randn(n,1); gamma=zeros(p,1); tau2=ones(p,1); vi=tau2./(1+tau2); my=F'*gamma; m=[]; fit=zeros(n,1); pfit=[]; data=reshape(Z,n,1); Fit=zeros(n,1); Pfit=zeros(n,nit); for j=1:(nbi+nskip*nit) phi=pnorm(-my,0,1); pr=1-phi; r=mean(pr); r=r/(1-r); pr=pr./(pr+(1-pr)*r); if (nvalid>0), data(ivalid)=rand(nvalid,1)nbi & floor((j-nbi)/nskip)==(j-nbi)/nskip) fit=fit+my; pfit=[pfit, pr]; m=[m,gamma]; end; if (floor(j/1000)==j/1000) j end; end; Fit=fit/nit; Pfit=pfit; gamma=mean(m,2); gamma=gamma(2:p); beta=A*gamma; F=F(2:p,:); p=p-1; %fit=Fit; pfit=mean(Pfit,2); sl=prctile(Pfit',5)'; su=prctile(Pfit',95)'; fit=Fit; pfit=mean(Pfit,2); sl=prctile(Pfit',17)'; su=prctile(Pfit',83)'; [g,h]=sort(abs(gamma)); h=flipud(h); figure(1); set(gca,'FontSize',12); if (nfac>1) newbcpairs(F(:,itrain),Z(itrain),h(1:2),itrain); hold off; else i=1:ntrain; f=F(itrain); z=Z(itrain); i=i(z==0); scatter(f(i),z(i)); hold on; i=1:ntrain; i=i(z==1); scatter(f(i),z(i),'r'); hold off; end; figure(2); set(gca,'FontSize',14); subplot(2,1,1); plot(gamma,'--bs','MarkerSize',6,'LineWidth',1.0); hold on; title('Expression weights: Metagenes'); xlabel('Metagene'); ylabel('gamma') plot([0 p],[0 0],'black--','LineWidth',1); hold off subplot(2,1,2); plot(beta,'LineWidth',1.0); hold on; plot([0 ngene],[0 0],'black--','LineWidth',1.0); hold off title('Expression weights: Genes') xlabel('Gene'); ylabel('beta') hold off; figure(3); set(gca,'FontSize',14); plot([min(fit) max(fit)],[.5 .5],'black--','LineWidth',1); hold on; axis([min(fit) max(fit) 0 1]) z=1:n; z(ivalid)=[]; ZZ=Z; ZZ(ivalid)=[]; z0=z(ZZ==0); z1=z(ZZ==1); for j=z1 text(fit(j),pfit(j),int2str(j),'Color','r','HorizontalAlignment','Center','FontSize',14) % text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','r','HorizontalAlignment','Center') plot([fit(j),fit(j)],[max(0,sl(j)),min(1,su(j))],'r:','LineWidth',2.0) end; for j=z0 text(fit(j),pfit(j),int2str(j),'Color','b','HorizontalAlignment','Center','FontSize',14) % text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','b','HorizontalAlignment','Center') plot([fit(j),fit(j)],[max(0,sl(j)),min(1,su(j))],'b:','LineWidth',2.0) end; %title('Fitted classification probabilities and outcomes') xlabel('Metagene score'); ylabel('Probability'); hold off; if (nvalid>0) % figure(4); set(gca,'FontSize',14); plot([min(fit) max(fit)],[.5 .5],'black--','LineWidth',1); axis([min(fit) max(fit) 0 1]) hold on; z=ivalid; ZZ=Z(ivalid); z0=z(ZZ==0); z1=z(ZZ==1); for j=z1 % text(fit(j),pfit(j),int2str(j),'Color','r','HorizontalAlignment','Center','FontSize',14) text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','r','HorizontalAlignment','Center') plot([fit(j),fit(j)],[max(0,sl(j)),min(1,su(j))],'r:','LineWidth',2.0) end; for j=z0 % text(fit(j),pfit(j),int2str(j),'Color','b','HorizontalAlignment','Center','FontSize',14) text(fit(j),pnorm(fit(j),0,1),int2str(j),'Color','b','HorizontalAlignment','Center') plot([fit(j),fit(j)],[max(0,sl(j)),min(1,su(j))],'b:','LineWidth',2.0) end; %title('CV probabilities: Validation cases') xlabel('Metagene score'); ylabel('Probability'); hold off; % end; hold off; li=reshape(li,ngene,1); if (cvi==1) %%% training cases ... [N,n]=size(x); Fitcv=zeros(n,1); Pfitcv=zeros(n,nit); for ij=1:ntrain ix=itrain(ij); ['running analysis ',int2str(ix) ] % reduce to most correlated ngene ignoring the hold out and validation cases iv=sort(unique([ix ivalid])); z=Z; z(iv)=[]; X=x; X(:,iv)=[]; s=[]; ind=select_genes_cor(X,z,ngene); si=ind(1:ngene); alli=unique([alli si]); X=x(si,:)-repmat(mean(x(si,:),2),1,n); [A,D,F]=svd_mw(X); D=D'; [p,n]=size(F); j=1:min(min(ngene,nfac),p); A=A(:,j); D=D(j); F=F(j,:); [p,n]=size(F); F=([ln*ones(1,n);F]); D=[sqrt(n);D]; D2=D.*D; p=p+1; y=randn(n,1); gamma=mean(m,2); tau2=ones(p,1); vi=tau2./(1+tau2); data=reshape(Z,n,1); my=F'*gamma; fit=0; pfit=[]; for j=1:(nbi+nskip*nit) phi=pnorm(-my,0,1); pr=1-phi; %%% r=mean(pr); r=r/(1-r); pr=pr./(pr+(1-pr)*r); data(iv)=rand(nvalid+1,1)nbi & floor((j-nbi)/nskip)==(j-nbi)/nskip) fit=fit+my(ix); pfit=[pfit, pr(ix)]; end; if (floor(j/1000)==j/1000) j end; end; Fitcv(ix)=fit/nit; Pfitcv(ix,:)=pfit; end; % fitcv=Fitcv; pfitcv=mean(Pfitcv,2); slcv=prctile(Pfitcv',5)'; sucv=prctile(Pfitcv',95)'; fitcv=Fitcv; pfitcv=mean(Pfitcv,2); slcv=prctile(Pfitcv',17)'; sucv=prctile(Pfitcv',83)'; figure(5); set(gca,'FontSize',14); plot([min(fitcv) max(fitcv)],[.5 .5],'black--','LineWidth',1); axis([min(fitcv) max(fitcv) 0 1]) hold on; z=itrain; ZZ=Z(itrain); z0=z(ZZ==0); z1=z(ZZ==1); for j=z1 % text(fitcv(j),pfitcv(j),int2str(j),'Color','r','HorizontalAlignment','Center','FontSize',14) text(fitcv(j),pnorm(fitcv(j),0,1),int2str(j),'Color','r','HorizontalAlignment','Center','FontSize',14) plot([fitcv(j),fitcv(j)],[max(0,slcv(j)),min(1,sucv(j))],'r:','LineWidth',2.0) end; for j=z0 % text(fitcv(j),pfitcv(j),int2str(j),'Color','b','HorizontalAlignment','Center','FontSize',14)' text(fitcv(j),pnorm(fitcv(j),0,1),int2str(j),'Color','b','HorizontalAlignment','Center','FontSize',14) plot([fitcv(j),fitcv(j)],[max(0,slcv(j)),min(1,sucv(j))],'b:','LineWidth',2.0) end; %title('CV probabilities and outcomes: Training sample') xlabel('Metagene score'); ylabel('Probability'); hold off; % end; alli=reshape(alli,length(alli),1);