function result=RoCPLS(X,y,k,plot) %RoCPLS is a robust method for Partial Least Squares classification based on the % SIMPLS algorithm. It can be applied to both low and high-dimensional data % matrix, X, and to one or multiple class problems with labels stored in "y". % % This program gives first two phases of RoCPLS: For given X, y and k; first, a weighted % data matrix, Xw, is calculated using PCOUT algorithm; secondly SIMPLS is applied % on the weighted data matrix to derive robust score matrix, T and robust % PLS weight matrix, R. % % %Input: %X: data matrix %y: class labels (smallest class label should be 0!!!), %k: number of components %plot: 1: construct i)PLS2 versus PLS1 plot and ii)orthogonal-score distance plot ; 0: no plot %Output: % result.T: Robust PLS component matrix % result.R: Robust PLS weight matrix % result.P: Robust PLS loading matrix % result.yc: ordered (from smallest to the largest) class labels % result.Xc: result.Xc=[X1'X2'...Xd']'; i.e. ordered data matrix wrt class % labels. % % !!!Note: Use T and result.yc for classification purpose %Created by: Nedret Billor & Asuman Turkmen %Updated on 04/01/2009 [n p]=size(X); Q=tabulate(y); d=size(Q,1);%number of classes Xn=[];Yn=[];yc=[];w=[]; %Phase 1: Determining the PCOUT based weighted data matrix %Weight vectors are calculated for each class: for i=1:d Xi=X(y==Q(i,1),:); Xn=[Xn;Xi]; [ni p]=size(Xi); if d>2; Yn(:,i)=(y==Q(i,1)); end; yc=[yc;repmat(Q(i,1),Q(i,2),1)]; if ni<=p; out=pcout(Xi,ni-1); else out=pcout(Xi,p); end; w=[w;out.w]; end; %Weighted data matrix is calculated: Xw=diag(w)*Xn; result.yc=yc; %ordered class labels result.Xc=Xn; %Xc=[X1'X2'...Xd']'; i.e. ordered data matrix wrt class labels %Phase 2: Determining the PCOUT based weighted data matrix if d==2 y=yc-repmat(mean(yc),n,1); [R,P,T]=simpls(Xw,y,k); else y=Yn-repmat(mean(Yn),n,1); [R,P,T]=simpls_2(Xw,y,k); end; result.T=T; result.R=R; result.P=P; %Score plots and orthogonal-score distance plots are created: if plot==1; Tori=diag(1./w)*result.T; %T=WXR ==>Tori=inv(W)T=XR T=medscale(Tori,1); f=2; result.sd=sqrt(diag(T.sc*T.sc.')); cutoff1=median(result.sd)+f*mad1(result.sd); result.od=sqrt(diag((Xn-Tori*result.P')*(Xn-Tori*result.P').')); cutoff2=median(result.od)+f*mad1(result.od); if k==1; fprintf('For score plot k should be at least 2!') scoreplot(result.sd,result.od,yc,cutoff1,cutoff2); title ('Robust Orthogonal-Score Distance Plot'); xlabel(['Score distance']); ylabel('Orthogonal distance'); else subplot(2,1,1); for i=1:d; hold('all'); scatter(Tori(yc==Q(i,1),2),Tori(yc==Q(i,1),1),'o'); end; title ('Robust Score Plot'); xlabel(['PLS1']); ylabel('PLS2'); subplot(2,1,2); scoreplot(result.sd,result.od,yc,cutoff1,cutoff2); title ('Robust Orthogonal-Score Distance Plot'); xlabel(['Score distance']); ylabel('Orthogonal distance'); end; end; %other required functions: %------------------------ function scoreplot(x,y,yc,cutoffx,cutoffy) Q=tabulate(yc); q=size(Q,1); for i=1:q; hold('all'); scatter(x(yc==Q(i,1),1),y(yc==Q(i,1),1),'o'); end; xmax=max([x ; cutoffx]); ymax=max([y ; cutoffy]); line([cutoffx cutoffx],[0 ymax] ,'Linestyle','-','Color','r'); line('Xdata',[0 xmax] ,'Ydata',[cutoffy cutoffy], 'Linestyle','-','Color','r'); %------------------------ function out=medscale(S,scale); % Computes the robust standadized matrix % Center: using coordinate wise medians % Scales: by MAD %input: %S: Data matrix %scale: %1: median %2: l1-median %Output: %out.sc=Centered and scaled matrix (columnwise) %out.med=median(S) %Robust center %out.mad=MAD(S) %Robust scatter %out.invmad %inverse of diag(scatter) [n p]=size(S); if scale==1; mx=median(S); elseif scale==2 mx=l1median(S,1e-4); end; Sdiff=S-repmat(mx,n,1); diff=abs(Sdiff); mad=1.4826*median(diff); mad=mad(:); w=[]; for i=1:p if (mad(i)~=0) a=1/mad(i); else a=1; end; w=[w;a]; end; result=Sdiff*diag(w);%Sphered data matrix out.sc=result; out.med=mx; out.mad=mad; out.invmad=diag(w);%gives inverse of diag(mad) %--------------------- function out=mad1(S) %Computes the median absolute deviance for data set S n=size(S,1); mx=median(S); Sdiff=S-repmat(mx,n,1); diff=abs(Sdiff); %out=1.4826*median(diff); out=median(diff); %--------------------- function [R,P,T]=simpls_2(x,y,A) %Multivariate SIMPLS! [n,pi]=size(x); qi=size(y,2); sigmaxy=x'*y; for i=1:A sigmayx=sigmaxy'; if qi>pi [RR,LL]=eig(sigmaxy*sigmayx); [LL,I]=greatsort(diag(LL)); rr=RR(:,I(1)); qq=sigmayx*rr; qq=qq/norm(qq); else if qi==1 qq = 1; rr = sigmaxy; else [QQ,LL]=eig(sigmayx*sigmaxy); [LL,I]=greatsort(diag(LL)); qq=QQ(:,I(1)); rr=sigmaxy*qq; end end tt=x*rr; nttc=norm(tt); rr=rr/nttc; tt=tt/nttc; qq=y'*tt; uu=y*qq; pp=x'*tt; vv=pp; if i>1 vv = vv -v*(v'*pp); end vv = vv/norm(vv); sigmaxy = sigmaxy - vv*(vv'*sigmaxy); v(:,i)=vv; q(:,i)=qq; t(:,i)=tt; u(:,i)=uu; p(:,i)=pp; r(:,i)=rr; end T=t;P=p;R=r;