Home > wafo > onedim > findoutliers.m

findoutliers

PURPOSE ^

Finds the indices to spurious points in a timeseries

SYNOPSIS ^

[ind, indg]=findoutliers(xx,zcrit,dcrit,ddcrit,plotflag)

DESCRIPTION ^

  FINDOUTLIERS Finds the indices to spurious points in a timeseries
 
  CALL:  [inds, indg] = findoutliers(xn,zcrit,dcrit,ddcrit);
 
      inds = indices to spurious points.
      indg = indices to the rest of the points.
  
       xn  = two column data matrix with sampled times and values.
 
     zcrit = critical distance between consecutive points.  
                  (Default=0)
     dcrit = critical distance of Dx used for determination of spurious
             points.  (Default=1.5 standard deviation of xn)            
    ddcrit = critical distance of DDx used for determination of spurious
             points.  (Default=1.5 standard deviation of xn)
  plotflag = 0 no plotting (default)
             1 plot the result
 
   Consecutive points less than zcrit apart  are considered as spurious.
   The point immediately after and before are also removed. Jumps greater than
   dcrit in Dxn and greater than ddcrit in D^2xn are also considered as spurious.
   (All distances to be interpreted in the vertical direction.)  
   Another good choice for dcrit and ddcrit are:
   
         dcrit = 5*dT  and ddcrit = 9.81/2*dT^2
 
  where dT is the timestep between points.
 
  See also  waveplot, reconstruct

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

001 function [ind, indg]=findoutliers(xx,zcrit,dcrit,ddcrit,plotflag) 
002 % FINDOUTLIERS Finds the indices to spurious points in a timeseries
003 %
004 % CALL:  [inds, indg] = findoutliers(xn,zcrit,dcrit,ddcrit);
005 %
006 %     inds = indices to spurious points.
007 %     indg = indices to the rest of the points.
008 % 
009 %      xn  = two column data matrix with sampled times and values.
010 %
011 %    zcrit = critical distance between consecutive points.  
012 %                 (Default=0)
013 %    dcrit = critical distance of Dx used for determination of spurious
014 %            points.  (Default=1.5 standard deviation of xn)            
015 %   ddcrit = critical distance of DDx used for determination of spurious
016 %            points.  (Default=1.5 standard deviation of xn)
017 % plotflag = 0 no plotting (default)
018 %            1 plot the result
019 %
020 %  Consecutive points less than zcrit apart  are considered as spurious.
021 %  The point immediately after and before are also removed. Jumps greater than
022 %  dcrit in Dxn and greater than ddcrit in D^2xn are also considered as spurious.
023 %  (All distances to be interpreted in the vertical direction.)  
024 %  Another good choice for dcrit and ddcrit are:
025 %  
026 %        dcrit = 5*dT  and ddcrit = 9.81/2*dT^2
027 %
028 % where dT is the timestep between points.
029 %
030 % See also  waveplot, reconstruct
031 
032 % Tested on: Matlab 5.3, 5.2 , 5.1
033 % History:           
034 % last modified by Per A. Brodtkorb 
035 % 29.03.99, new input arguments
036 % 01.10.98 checks  input and accepts missing values NaN's
037 % 25.09.98, 13.08-98
038 
039 
040 if nargin <5|isempty(plotflag)
041   plotflag=0;
042 end
043 % finding outliers
044 findjumpsDx=1; % find jumps in Dx
045 %      two point spikes and Spikes dcrit above/under the
046 %       previous and the following point are spurios.
047 findSpikes=0; % find spikes
048 findDspikes=0; % find double (two point) spikes
049 findjumpsD2x=1; % find jumps in D^2x
050 findNaN=1; % find missing values
051 
052 xn=xx;
053 [n m]= size(xn);
054 
055 if n<m
056  b=m;m=n;n=b; 
057  xn=xn';
058 end
059 
060 if n<2, 
061   error('The vector must have more than 2 elements!')
062 end
063 
064 switch m
065  case 1, % OK dimension
066  case 2, xn=xn(:,2);
067  otherwise, 
068    error('Wrong dimension of input! dim must be 2xN, 1xN, Nx2 or Nx1 ')       
069 end
070 
071 ind=[];indg=[];
072 indmiss=isnan(xn);
073 
074 
075 if nargin<2|isempty(zcrit),
076   zcrit=0;
077 end
078 if nargin<3|isempty(dcrit),
079   dcrit=1.5*std(xn(~indmiss));
080   disp(['dcrit is set to ' num2str(dcrit)])
081 end
082 if nargin<4|isempty(ddcrit),
083   ddcrit=1.5*std(xn(~indmiss));
084   disp(['ddcrit is set to ' num2str(ddcrit)])
085 end
086 if findNaN, 
087   ind=find(indmiss);
088   disp(['Found ' num2str(length(ind)) ' missing points'])
089   xn(indmiss)=0;%set NaN's to zero 
090 end
091 
092 dxn=diff(xn);
093 ddxn=diff(dxn);
094 
095 if  findSpikes , % finding spurious spikes
096   tmp=find(((dxn(1:(end-1))>dcrit).*(dxn(2:end))<-dcrit) |...
097       ((dxn(1:(end-1))<-dcrit).*(dxn(2:end))>dcrit) )+1;
098   %if ~isempty(tmp),
099     disp(['Found ' num2str(length(tmp)) ' spurious spikes'])
100   %end
101   ind=[ind;tmp];
102 end
103 
104 if findDspikes ,% finding spurious double (two point) spikes  
105   tmp= find(((dxn(1:(end-2))>dcrit).*(dxn(3:end))<-dcrit) |...
106       ((dxn(1:(end-2))<-dcrit).*(dxn(3:end))>dcrit) )+1;
107   %if ~isempty(tmp),
108     disp(['Found ' num2str(length(tmp)) ' spurious two point (double) spikes'])
109   %end
110   ind=[ind;tmp;tmp+1];%removing both points
111 end
112 
113 if findjumpsDx ,% finding spurious jumps  in Dx
114   % finding spurious positive jumps  
115   tmp= find(dxn>dcrit);
116   %if ~isempty(tmp),
117     disp(['Found ' num2str(length(tmp)) ' spurious positive jumps of Dx'])
118   %end
119   ind=[ind;tmp+1]; %removing the point after the jump 
120 
121   % finding spurious negative jumps  
122   tmp= find(dxn<-dcrit);
123   %if ~isempty(tmp),
124     disp(['Found ' num2str(length(tmp)) ' spurious negative jumps of Dx'])
125   %end
126   ind=[ind;tmp];% tmp+1]; %removing the point before the jump
127 end
128 
129 if findjumpsD2x ,% finding spurious jumps in D^2x  
130   % finding spurious positive jumps  
131   tmp= find(ddxn>ddcrit)+1;
132   %if ~isempty(tmp),
133     disp(['Found ' num2str(length(tmp)) ' spurious positive jumps of D^2x'])
134   %end
135   ind=[ind;tmp];%tmp+1];%tmp-2]; removing the jump
136 
137   % finding spurious negative jumps  
138   tmp= find(ddxn<-ddcrit)+1;
139   %if ~isempty(tmp),
140     disp(['Found ' num2str(length(tmp)) ' spurious negative jumps of D^2x'])
141   %end
142   ind=[ind;tmp];%tmp+1];% tmp-2];removing the jump
143 end
144 
145 if zcrit>=0
146   % finding consecutive values less than zcrit apart.
147   indzeros=(abs(dxn)<=zcrit);
148   indz=find(indzeros)+1;
149   %if ~isempty(indz),
150     if zcrit==0,
151       disp(['Found ' num2str(length(indz)) ' consecutive equal values'])
152     else
153       disp(['Found ' num2str(length(indz)) ' consecutive values'])
154       disp(['less than ' num2str(zcrit) ' apart'])
155     end
156   %end
157   
158   %finding the beginning and end of consecutive equal values
159   indtr=find((diff(indzeros)))+1;
160 
161   %indices to consecutive equal points
162   if 1, % removing the point before + all equal points + the point after  
163     ind=[ind;(indtr(:)-1);indz;indtr(:);(indtr(:)+1);];
164   else % removing all points + the point after 
165     ind=[ind;;indz;indtr(:);(indtr(:)+1)];
166   end  
167 end
168 
169 if length(ind)>1,
170   ind=sort(ind);
171   ind(diff(ind)==0)=[];%removing indices to points identified several times
172 end
173 
174 %if ~isempty(indg),
175   % for ix=1:length(indg)
176   % ind(indg(ix)==ind)=[];%removing indices to points near NaN's
177   % end
178 %end
179 if ~isempty(ind),
180   disp(['Found the total of ' num2str(length(ind)) ' spurious points'])
181 end
182 
183 if (nargout==2)|plotflag
184   indg=(1:n)';
185   indg(ind)=[];
186 end
187 
188 
189  if plotflag,
190    %xn2=[xn(:,1)   spline(xn(indg,1),xn(indg,2),xn(:,1)) ]; 
191    waveplot(xx(indg,:),xx(ind,:),20)
192  end
193

Mathematical Statistics
Centre for Mathematical Sciences
Lund University with Lund Institute of Technology

Comments or corrections to the WAFO group


Generated on Thu 06-Oct-2005 02:21:16 for WAFO by m2html © 2003