This repository has been archived by the owner on May 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
cpcr.m
executable file
·159 lines (151 loc) · 5.78 KB
/
cpcr.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
function result=cpcr(x,y,varargin)
%CPCR performs a classical principal components regression.
% First, classical PCA is applied to the predictor variables x (see cpca.m) and
% k components are retained. Then a multiple linear regression method (see mlr.m)
% is performed of the response variable y on the k principal components.
%
% I/O: result=cpcr(x,y,'k',2);
%
% Required input arguments:
% x : Data matrix of the explanatory variables
% (n observations in rows, p variables in columns)
% y : Data matrix of the response variables
% (n observations in rows, q variables in columns)
%
% Optional input argument:
% k : Number of principal components to compute. If k is missing,
% a scree plot is drawn which allows to select
% the number of principal components.
% plots : If equal to one (default), a menu is shown which allows to draw several plots,
% such as a score outlier map and a regression outlier map.
% If 'plots' is equal to zero, all plots are suppressed.
% See also makeplot.m
%
% The output is a structure containing
%
% result.slope : Classical slope
% result.int : Classical intercept
% result.fitted : Classcial prediction vector
% result.res : Classical residuals
% result.sigma : Estimated variance-covariance matrix of the residuals
% result.rsquared : R-squared value
% result.k : Number of components used in the regression
% result.sd : Classical score distances
% result.od : Classical orthogonal distances
% result.resd : Residual distances (when there are several response variables).
% If univariate regression is performed, it contains the standardized residuals.
% result.cutoff : Cutoff values for the score, orthogonal and residual distances.
% result.flag : The observations whose orthogonal distance is larger than result.cutoff.od
% (orthogonal outliers => result.flag.od) and/or whose residual distance is
% larger than result.cutoff.resd (bad leverage points/vertical outliers => result.flag.resd)
% can be considered as outliers and receive a flag equal to zero (=> result.flag.all).
% The regular observations, including the good leverage points, receive a flag 1.
% result.class : 'CPCR'
% result.cpca : Full output of the classical PCA part (see cpca.m)
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
% http://wis.kuleuven.be/stat/robust.html
%
% Written by S. Verboven
% Last Update: 05/04/2003
default=struct('plots',1,'k',0);
list=fieldnames(default);
options=default;
IN=length(list);
i=1;
counter=1;
if nargin>2
%
% placing inputfields in array of strings
%
for j=1:nargin-2
if rem(j,2)~=0
chklist{i}=varargin{j};
i=i+1;
end
end
% Checking which default parameters have to be changed
% and keep them in the structure 'options'.
while counter<=IN
index=strmatch(list(counter,:),chklist,'exact');
if ~isempty(index) % in case of similarity
for j=1:nargin-2 % searching the index of the accompanying field
if rem(j,2)~=0 % fieldnames are placed on odd index
if strcmp(chklist{index},varargin{j})
I=j;
end
end
end
options=setfield(options,chklist{index},varargin{I+1});
index=[];
end
counter=counter+1;
end
end
%classical PCA
if options.k==0
out.cpca=cpca(x,'plots',0);
else
out.cpca=cpca(x,'plots',0,'k',options.k);
end
%model with intercept
T=[out.cpca.T(:,1:out.cpca.k) ones(size(out.cpca.T,1),1)];
%Multivariate linear regression
out.a=inv(T'*T)*T'*y;
out.fitted=T*out.a;
len=size(out.a,1);
p=size(T,2);
q=size(y,2);
geg=[T,y];
[n,m]=size(geg);
S=cov(geg);
Sx=S(1:p-1,1:p-1);
Sxy=S(1:p-1,p+1:m);
Syx=Sxy';
Sy=S(p+1:m,p+1:m);
Se=Sy-out.a(1:p-1,1:q)'*Sx*out.a(1:p-1,1:q); %variance of errors
%regression coefficients slope and intercept ([\beta \alpha])
out.coeffs=[out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:); out.a(len,:)-out.cpca.M*out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:)]; %%coefficients in the original space;
out.slope=out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:);
out.int=out.a(len,:)-out.cpca.M*out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:);
out.res=y-out.fitted;
out.sigma=Se;
if q==1
out.stdres=out.res./sqrt(diag(out.sigma));
end
out.k=out.cpca.k;
STTm=sum((y-repmat(mean(y),length(y),1)).^2);
SSE=sum(out.res.^2);
out.rsquared=1-SSE/STTm;
out.class='CPCR';
%calculating residual distances
if q>1
if (-log(det(Se)/(m-1)))>50
out.resd='singularity';
else
cen=zeros(q,1);
out.resd=sqrt(mahalanobis(out.res,cen','cov',Se))';
end
else % q==1
out.resd=out.stdres; %standardized residuals
end
out.cutoff=out.cpca.cutoff;
out.cutoff.resd=sqrt(chi2inv(0.975,size(y,2)));
%computing flags
out.flag.od=out.cpca.flag.od;
out.flag.sd=out.cpca.flag.sd;
out.flag.resd=abs(out.resd)<=out.cutoff.resd;
out.flag.all=(out.flag.od & out.flag.resd);
result=struct( 'slope',{out.slope}, 'int',{out.int},'fitted',{out.fitted},'res',{out.res},...
'cov',{out.sigma},'rsquared',{out.rsquared},...
'k',{out.k},'sd', {out.cpca.sd},'od',{out.cpca.od},'resd',{out.resd},...
'cutoff',{out.cutoff},'flag',{out.flag},'class',{out.class},...
'cpca',{out.cpca});
try
if options.plots
makeplot(result)
end
catch %output must be given even if plots are interrupted
%> delete(gcf) to get rid of the menu
end