function [Q, I, B, BB] = lsselect(y,x,crit,how,pmax,level);
%LSSELECT Select a predictor subset for regression
%
%     	  [Q, I, B, BB] = lsselect(y,x,crit,how,pmax,level)
%
%	  Selects a good subset of regressors in a multiple linear 
%	  regression model. The criterion is one of the following 
%	  ones, determined by the third argument, crit. 
%
%	    'HT'   Hypothesis Test (default level = 0.05)
%	    'AIC'  Akaike's Information Criterion 
%	    'BIC'  Bayesian Information Criterion
%	    'CMV'  Cross Model Validation (inner criterion RSS)
%
%	  The forth argument, how, choses between 
%
%	    'AS'   All Subsets
%	    'FI'   Forward Inclusion
%	    'BE'   Backward Elimination
%
%	  The fifth argument, pmax, limits the number of included
%	  parameters. The returned Q is the criterion as a function of 
%	  the number of parameters; it might be interpreted as an 
%	  estimate of the prediction standard deviation. For the method
%	  'HT' the reported Q is instead the successive p-values for
%	  inclusion or elimination.
%
%	  The last column of the prediction matrix x must be an intercept 
%	  column, ie all elements are ones. This column is never excluded 
%	  in the search for a good model. If it is not present it is added.  
%	  The output I contains the index numbers of the included columns.
%	  For the method 'HT' the optional input argument "level" is the 
%	  p-value reference used for inclusion or deletion. Output B is
%	  the vector of coefficients, ie the suggested model is 
%	  Y = X*B. Column p of BB is the best B of parameter size p. 
%
%	  This function is not highly optimized for speed but rather for
%	  flexibility. It would be faster if 'all subsets' were in a 
%	  separate routine and 'forward' and 'backward' were in another
%	  routine, especially for CMV.
%
%         See also LSFIT and LINREG.

%       Anders Holtsberg, 14-12-94
%       Copyright (c) Anders Holtsberg

n = length(y);
nc = size(x,2);

if nargin<5 
   pmax = nc;
elseif isempty(pmax)
   pmax = nc;
end
if nargin<4
   how = 'FI';
   fprintf('   Default is forward selection\n');
end
if nargin<3
   crit = 'CMV';
   fprintf('   Default criterion is CMV\n');
end
if strcmp(how,'BE')
   pmax = nc;
end
if nargin<6 & strcmp(crit,'HT') 
   level = 0.05;
   fprintf('   Default level is 0.05\n');
end

if any(x(:,nc)~=1)
   fprintf('   An intercept column added')
   x = [x ones(n,1)];
   nc = nc + 1;
   pmax = pmax + 1;
end
if nc<2, disp('only one model'), return, end 

if ~(strcmp(crit,'HT')|strcmp(crit,'AIC')|...
     strcmp(crit,'BIC')|strcmp(crit,'CMV'))
  error('Third argument error');
end
if ~(strcmp(how,'BE')|strcmp(how,'FI')|strcmp(how,'AS'))
  error('Forth argument error');
end
      
Qsml = NaN*ones(pmax,1);

XX = x'*x;
XY = x'*y; 
YY = y'*y;  

% === If all subsets then set up an all-subsets-indicator-matrix ====

if strcmp(how,'AS')
   C = [];
   for i = 1:nc-1
      d = max(1,size(C,1));
      C = [zeros(d,1) C; ones(d,1) C];
      H = C * ones(size(C,2),1);
      J = find(H<pmax);
      C = C(J,:);
   end
   H = H(J);
   [S,I] = sort(H);
   C = C(I,:);
   C = [C ones(size(C,1),1)];
   AllSubsets = C;
   AllSubsetsH = sum(C')';
end

% === This is for CMV ===============================================

if strcmp(crit,'CMV')
   dataloopend = n + 1;
   Qcmv = zeros(pmax,1);
   XXs = XX;
   XYs = XY;
   YYs = YY;
else
   dataloopend = 1;
end

for idata = 1:dataloopend
   
   if strcmp(crit,'CMV')
      fprintf('\n %3.0f:', idata*(idata ~= dataloopend))
      if idata == dataloopend
         XX = XXs;
         XY = XYs;
         YY = YYs;
      else
         xi = x(idata,:);
         yi = y(idata);
         XX = XXs - xi'*xi;
         XY = XYs - xi'*yi;
         YY = YYs - yi^2; 
      end
   end

% === Now we begin to loop over model sizes =========================

   if strcmp(how,'BE')
      p = nc;
      loopto = 1;
      C = ones(1,nc);
   else
      p = 1;
      loopto = pmax;
      C = [zeros(1,nc-1) 1];
   end
   BB = zeros(nc,pmax);

   fprintf('  ');

   while 1;

% === The whole loop over the models of size p  ======================
     
      fprintf('%2.0f ', p)
      qmin = 1e99;
      for k = 1:size(C,1)
         Jk = find(C(k,:));
         q = YY - XY(Jk)'*(XX(Jk,Jk)\XY(Jk));               
         if q < qmin
            qmin = q;
            Cbest = C(k,:);
         end
      end
      Qsml(p) = qmin;
      I = find(Cbest);
      BB(I,p)  = XX(I,I)\XY(I);
   
% === And a piece for CMV only ======================================

      if strcmp(crit,'CMV') & idata < n + 1
         Jk = find(Cbest);
         q = yi - xi(Jk)*(XX(Jk,Jk)\XY(Jk));               
         Qcmv(p) = Qcmv(p) + q^2;
      end

% === Next parameter size ===========================================
   
      if p == loopto, break, end

      if strcmp(how,'FI')
         p = p + 1;
         C = [];
         for i = 1:nc-1
            if Cbest(i)==0
               Cnew = Cbest;
               Cnew(i) = 1;
               C = [C; Cnew];
            end
         end

      elseif strcmp(how,'BE')
         p = p - 1;
         C = [];
         for i = 1:nc-1
            if Cbest(i)==1
               Cnew = Cbest;
               Cnew(i) = 0;
               C = [C; Cnew];
            end
         end
   
      elseif strcmp(how,'AS')
         p = p + 1;
         C = AllSubsets(find(AllSubsetsH==p),:);
      end
   
   end
end

% === Finished at last ===============================================

if strcmp(crit,'CMV') 
   Q = sqrt(Qcmv/n);
end
if strcmp(crit,'AIC') 
   Q = sqrt(Qsml/n).*exp((1:pmax)'/n);
end;
if strcmp(crit,'BIC')
   Q = sqrt(Qsml/n).*exp((1:pmax)'/n*log(n)/2);
end

if strcmp(crit,'HT')
   Qsml = sqrt(Qsml/n);
   Fvals = (Qsml(1:pmax-1).^2 - Qsml(2:pmax).^2) ...
            ./ (Qsml(2:pmax).^2 ./ (n-(2:pmax))' );
   Q = 1-pf(Fvals,1,(n-(2:pmax))');
   i = find(Q>level);
   if strcmp(how,'BE')
      i = [1; Q<level];
      i = find(i);
      i = i(length(i));
   else
      i = [Q>level; 1];
      i = find(i);
      i = i(1);
   end
else
   [S,i] = sort(Q);
   i = i(1);
end

B = BB(:,i);
I = find(B);

fprintf('\n');

