Home > wafo > onedim > findoutliers.m

# findoutliers

## PURPOSE

Finds the indices to spurious points in a timeseries

## SYNOPSIS

[ind, indg]=findoutliers(xx,zcrit,dcrit,ddcrit,plotflag)

## DESCRIPTION

FINDOUTLIERS Finds the indices to spurious points in a timeseries

CALL:  [inds, indg] = findoutliers(xn,zcrit,dcrit,ddcrit);

inds = indices to spurious points.
indg = indices to the rest of the points.

xn  = two column data matrix with sampled times and values.

zcrit = critical distance between consecutive points.
(Default=0)
dcrit = critical distance of Dx used for determination of spurious
points.  (Default=1.5 standard deviation of xn)
ddcrit = critical distance of DDx used for determination of spurious
points.  (Default=1.5 standard deviation of xn)
plotflag = 0 no plotting (default)
1 plot the result

Consecutive points less than zcrit apart  are considered as spurious.
The point immediately after and before are also removed. Jumps greater than
dcrit in Dxn and greater than ddcrit in D^2xn are also considered as spurious.
(All distances to be interpreted in the vertical direction.)
Another good choice for dcrit and ddcrit are:

dcrit = 5*dT  and ddcrit = 9.81/2*dT^2

where dT is the timestep between points.

## CROSS-REFERENCE INFORMATION

This function calls:
 waveplot Plots the surface elevation of timeseries. error Display message and abort function. num2str Convert number to string. (Fast version) std Standard deviation.
This function is called by:
 Chapter2 % CHAPTER2 Modelling random loads and stochastic waves recinit setup all global variables of the RECDEMO

## SOURCE CODE

001 function [ind, indg]=findoutliers(xx,zcrit,dcrit,ddcrit,plotflag)
002 % FINDOUTLIERS Finds the indices to spurious points in a timeseries
003 %
004 % CALL:  [inds, indg] = findoutliers(xn,zcrit,dcrit,ddcrit);
005 %
006 %     inds = indices to spurious points.
007 %     indg = indices to the rest of the points.
008 %
009 %      xn  = two column data matrix with sampled times and values.
010 %
011 %    zcrit = critical distance between consecutive points.
012 %                 (Default=0)
013 %    dcrit = critical distance of Dx used for determination of spurious
014 %            points.  (Default=1.5 standard deviation of xn)
015 %   ddcrit = critical distance of DDx used for determination of spurious
016 %            points.  (Default=1.5 standard deviation of xn)
017 % plotflag = 0 no plotting (default)
018 %            1 plot the result
019 %
020 %  Consecutive points less than zcrit apart  are considered as spurious.
021 %  The point immediately after and before are also removed. Jumps greater than
022 %  dcrit in Dxn and greater than ddcrit in D^2xn are also considered as spurious.
023 %  (All distances to be interpreted in the vertical direction.)
024 %  Another good choice for dcrit and ddcrit are:
025 %
026 %        dcrit = 5*dT  and ddcrit = 9.81/2*dT^2
027 %
028 % where dT is the timestep between points.
029 %
031
032 % Tested on: Matlab 5.3, 5.2 , 5.1
033 % History:
035 % 29.03.99, new input arguments
036 % 01.10.98 checks  input and accepts missing values NaN's
037 % 25.09.98, 13.08-98
038
039
040 if nargin <5|isempty(plotflag)
041   plotflag=0;
042 end
043 % finding outliers
044 findjumpsDx=1; % find jumps in Dx
045 %      two point spikes and Spikes dcrit above/under the
046 %       previous and the following point are spurios.
047 findSpikes=0; % find spikes
048 findDspikes=0; % find double (two point) spikes
049 findjumpsD2x=1; % find jumps in D^2x
050 findNaN=1; % find missing values
051
052 xn=xx;
053 [n m]= size(xn);
054
055 if n<m
056  b=m;m=n;n=b;
057  xn=xn';
058 end
059
060 if n<2,
061   error('The vector must have more than 2 elements!')
062 end
063
064 switch m
065  case 1, % OK dimension
066  case 2, xn=xn(:,2);
067  otherwise,
068    error('Wrong dimension of input! dim must be 2xN, 1xN, Nx2 or Nx1 ')
069 end
070
071 ind=[];indg=[];
072 indmiss=isnan(xn);
073
074
075 if nargin<2|isempty(zcrit),
076   zcrit=0;
077 end
078 if nargin<3|isempty(dcrit),
079   dcrit=1.5*std(xn(~indmiss));
080   disp(['dcrit is set to ' num2str(dcrit)])
081 end
082 if nargin<4|isempty(ddcrit),
083   ddcrit=1.5*std(xn(~indmiss));
084   disp(['ddcrit is set to ' num2str(ddcrit)])
085 end
086 if findNaN,
087   ind=find(indmiss);
088   disp(['Found ' num2str(length(ind)) ' missing points'])
089   xn(indmiss)=0;%set NaN's to zero
090 end
091
092 dxn=diff(xn);
093 ddxn=diff(dxn);
094
095 if  findSpikes , % finding spurious spikes
096   tmp=find(((dxn(1:(end-1))>dcrit).*(dxn(2:end))<-dcrit) |...
097       ((dxn(1:(end-1))<-dcrit).*(dxn(2:end))>dcrit) )+1;
098   %if ~isempty(tmp),
099     disp(['Found ' num2str(length(tmp)) ' spurious spikes'])
100   %end
101   ind=[ind;tmp];
102 end
103
104 if findDspikes ,% finding spurious double (two point) spikes
105   tmp= find(((dxn(1:(end-2))>dcrit).*(dxn(3:end))<-dcrit) |...
106       ((dxn(1:(end-2))<-dcrit).*(dxn(3:end))>dcrit) )+1;
107   %if ~isempty(tmp),
108     disp(['Found ' num2str(length(tmp)) ' spurious two point (double) spikes'])
109   %end
110   ind=[ind;tmp;tmp+1];%removing both points
111 end
112
113 if findjumpsDx ,% finding spurious jumps  in Dx
114   % finding spurious positive jumps
115   tmp= find(dxn>dcrit);
116   %if ~isempty(tmp),
117     disp(['Found ' num2str(length(tmp)) ' spurious positive jumps of Dx'])
118   %end
119   ind=[ind;tmp+1]; %removing the point after the jump
120
121   % finding spurious negative jumps
122   tmp= find(dxn<-dcrit);
123   %if ~isempty(tmp),
124     disp(['Found ' num2str(length(tmp)) ' spurious negative jumps of Dx'])
125   %end
126   ind=[ind;tmp];% tmp+1]; %removing the point before the jump
127 end
128
129 if findjumpsD2x ,% finding spurious jumps in D^2x
130   % finding spurious positive jumps
131   tmp= find(ddxn>ddcrit)+1;
132   %if ~isempty(tmp),
133     disp(['Found ' num2str(length(tmp)) ' spurious positive jumps of D^2x'])
134   %end
135   ind=[ind;tmp];%tmp+1];%tmp-2]; removing the jump
136
137   % finding spurious negative jumps
138   tmp= find(ddxn<-ddcrit)+1;
139   %if ~isempty(tmp),
140     disp(['Found ' num2str(length(tmp)) ' spurious negative jumps of D^2x'])
141   %end
142   ind=[ind;tmp];%tmp+1];% tmp-2];removing the jump
143 end
144
145 if zcrit>=0
146   % finding consecutive values less than zcrit apart.
147   indzeros=(abs(dxn)<=zcrit);
148   indz=find(indzeros)+1;
149   %if ~isempty(indz),
150     if zcrit==0,
151       disp(['Found ' num2str(length(indz)) ' consecutive equal values'])
152     else
153       disp(['Found ' num2str(length(indz)) ' consecutive values'])
154       disp(['less than ' num2str(zcrit) ' apart'])
155     end
156   %end
157
158   %finding the beginning and end of consecutive equal values
159   indtr=find((diff(indzeros)))+1;
160
161   %indices to consecutive equal points
162   if 1, % removing the point before + all equal points + the point after
163     ind=[ind;(indtr(:)-1);indz;indtr(:);(indtr(:)+1);];
164   else % removing all points + the point after
165     ind=[ind;;indz;indtr(:);(indtr(:)+1)];
166   end
167 end
168
169 if length(ind)>1,
170   ind=sort(ind);
171   ind(diff(ind)==0)=[];%removing indices to points identified several times
172 end
173
174 %if ~isempty(indg),
175   % for ix=1:length(indg)
176   % ind(indg(ix)==ind)=[];%removing indices to points near NaN's
177   % end
178 %end
179 if ~isempty(ind),
180   disp(['Found the total of ' num2str(length(ind)) ' spurious points'])
181 end
182
183 if (nargout==2)|plotflag
184   indg=(1:n)';
185   indg(ind)=[];
186 end
187
188
189  if plotflag,
190    %xn2=[xn(:,1)   spline(xn(indg,1),xn(indg,2),xn(:,1)) ];
191    waveplot(xx(indg,:),xx(ind,:),20)
192  end
193

Mathematical Statistics
Centre for Mathematical Sciences
Lund University with Lund Institute of Technology

Comments or corrections to the WAFO group

Generated on Thu 06-Oct-2005 02:21:16 for WAFO by m2html © 2003