Home > wafo > kdetools > gridcount.m

gridcount

PURPOSE ^

D-dimensional histogram using linear binning.

SYNOPSIS ^

c = gridcount(data,X)

DESCRIPTION ^

 GRIDCOUNT D-dimensional histogram using linear binning. 
  
  CALL  c = gridcount(data,X) 
  
  c    = gridcount, size N x 1           if D = 1  
                    size N x N x ... x N if D > 1   
  data = row vectors with D-dimensional data, size Nd x D 
  X    = column vectors defining discretization, size N x D 
         Must include the range of the data. 
  GRIDCOUNT obtains the grid counts using linear binning. 
  There are 2 strategies: simple- or linear- binning. 
  Suppose that an observation occurs at x and that the nearest point 
  below and above is y and z, respectively. Then simple binning strategy 
  assigns a unit weight to either y or z, whichever is closer. Linear 
  binning, on the other hand, assigns the grid point at y with the weight 
  of (z-x)/(z-y) and the gridpoint at z a weight of (y-x)/(z-y). 
    
  In terms of approximation error of using gridcounts as pdf-estimate, 
  linear binning is significantly more accurate than simple binning.   
  
  NOTE: The interval [min(X);max(X)] must include the range of the data. 
        The order of C is permuted in the same order as  
        meshgrid for D==2 or D==3.   
     
  Example 
   N     = 500; 
   data  = wraylrnd(1,N,1); 
   x = linspace(0,max(data)+1,50).';   
   dx = x(2)-x(1);   
   c = gridcount(data,x); 
   plot(x,c,'.')   % 1D histogram 
   plot(x,c/dx/N)  % 1D probability density plot 
   trapz(x,c/dx/N)    
  
  See also  bincount, kdebin

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

001 function c = gridcount(data,X) 
002 %GRIDCOUNT D-dimensional histogram using linear binning. 
003 % 
004 % CALL  c = gridcount(data,X) 
005 % 
006 % c    = gridcount, size N x 1           if D = 1  
007 %                   size N x N x ... x N if D > 1   
008 % data = row vectors with D-dimensional data, size Nd x D 
009 % X    = column vectors defining discretization, size N x D 
010 %        Must include the range of the data. 
011 % GRIDCOUNT obtains the grid counts using linear binning. 
012 % There are 2 strategies: simple- or linear- binning. 
013 % Suppose that an observation occurs at x and that the nearest point 
014 % below and above is y and z, respectively. Then simple binning strategy 
015 % assigns a unit weight to either y or z, whichever is closer. Linear 
016 % binning, on the other hand, assigns the grid point at y with the weight 
017 % of (z-x)/(z-y) and the gridpoint at z a weight of (y-x)/(z-y). 
018 %   
019 % In terms of approximation error of using gridcounts as pdf-estimate, 
020 % linear binning is significantly more accurate than simple binning.   
021 % 
022 % NOTE: The interval [min(X);max(X)] must include the range of the data. 
023 %       The order of C is permuted in the same order as  
024 %       meshgrid for D==2 or D==3.   
025 %    
026 % Example 
027 %  N     = 500; 
028 %  data  = wraylrnd(1,N,1); 
029 %  x = linspace(0,max(data)+1,50).';   
030 %  dx = x(2)-x(1);   
031 %  c = gridcount(data,x); 
032 %  plot(x,c,'.')   % 1D histogram 
033 %  plot(x,c/dx/N)  % 1D probability density plot 
034 %  trapz(x,c/dx/N)    
035 % 
036 % See also  bincount, kdebin 
037    
038 %Reference 
039 %  Wand,M.P. and Jones, M.C. (1995)  
040 % 'Kernel smoothing' 
041 %  Chapman and Hall, pp 182-192   
042    
043 %  History 
044 % revised pab Feb2005 
045 % -fixed a bug for d>2;   
046 % by pab Dec2003   
047    
048 error(nargchk(2,2,nargin)) 
049  
050 [n,d]    = size(data);   
051 [inc,d1] = size(X); 
052  
053 if d~=d1 
054   error('Dimension 2 of data and X do not match.') 
055 end 
056  
057 dx  = diff(X(1:2,:),1); 
058 xlo = X(1,  :); 
059 xup = X(end,:); 
060  
061 if any(min(data,[],1)<xlo) | any(xup<max(data,[],1)) 
062   error('X does not include whole range of the data!') 
063 end 
064  
065 if d>1 
066   csiz = inc(:,ones(1,d)); 
067   n1   = n; 
068 else 
069   csiz = [inc,1]; 
070   n1   = 1; 
071 end 
072    
073 binx = floor((data-xlo(ones(n1,1),:))./dx(ones(n1,1),:))+1; 
074 w    = prod(dx); 
075 if  d==1, 
076   try, 
077     % binc is much faster than sparse for 1D data 
078     % However, for N-Dimensional data sparse is sometimes faster. 
079     c = zeros(csiz); 
080     [len,bin,val] = bincount(binx,(X(binx+1)-data)); 
081     c(bin) = val; 
082     [len,bin,val] = bincount(binx+1,(data-X(binx))); 
083     c(bin) = c(bin)+val; 
084     c = c/w; 
085   catch 
086     c = full(sparse(binx,1,(X(binx+1)-data),inc,1)+... 
087          sparse(binx+1,1,(data-X(binx)),inc,1))/w; 
088   end 
089 elseif d==2 
090   b2 = binx(:,2); 
091   b1 = binx(:,1); 
092   c = full(sparse(b1,b2 ,abs(prod(([X(b1+1,1) X(b2+1,2)]-data),2)),inc,inc)); 
093   c = c + sparse(b1+1,b2  ,abs(prod(([X(b1,1) X(b2+1,2)]-data),2)),inc,inc); 
094   c = c + sparse(b1  ,b2+1,abs(prod(([X(b1+1,1) X(b2,2)]-data),2)),inc,inc); 
095   c = (c + sparse(b1+1,b2+1,abs(prod(([X(b1,1) X(b2,2)]-data),2)),inc,inc))/w; 
096 else % d>2 
097  useSparse = 1; 
098   Nc = prod(csiz); 
099   c  = zeros(Nc,1); 
100   
101   fact2 = [0 inc*(1:d-1)]; 
102   fact1 = [1 cumprod(csiz(1:d-1))]; 
103   fact1 = fact1(ones(n,1),:); 
104   for ir=0:2^(d-1)-1, 
105       bt0 = bitget(ir,1:d); 
106       bt1 = ~bt0; 
107        
108       % Convert to linear index (faster than sub2ind) 
109       b1  = sum((binx + bt0(ones(n,1),:)-1).*fact1,2)+1; %linear index to c 
110       bt2 = bt1 + fact2; 
111       b2  = binx + bt2(ones(n,1),:);                     % linear index to X 
112       if useSparse 
113         % Fast gridding using sparse 
114         c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1); 
115       else 
116         [len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2))); 
117         c(bin)        = c(bin)+val;        
118       end 
119        
120        % Convert to linear index (faster than sub2ind) 
121       b1  = sum((binx + bt1(ones(n,1),:)-1).*fact1,2)+1; % linear index to c 
122       bt2 = bt0 + fact2; 
123       b2  = binx + bt2(ones(n,1),:);                     % linear index to X 
124        
125       if useSparse 
126         % Fast gridding using sparse 
127         c = c + sparse(b1,1,abs(prod(X(b2)-data,2)),Nc,1); 
128       else 
129         [len,bin,val] = bincount(b1,abs(prod(X(b2)-data,2))); 
130         c(bin)        = c(bin)+val;    
131       end 
132     end 
133     c = reshape(c/w,csiz); 
134 end 
135 switch d % make sure c is stored in the same way as meshgrid 
136  case 2,  c = c.'; 
137  case 3,  c = permute(c,[2 1 3]); 
138 end 
139 return

Mathematical Statistics
Centre for Mathematical Sciences
Lund University with Lund Institute of Technology

Comments or corrections to the WAFO group


Generated on Thu 06-Oct-2005 02:21:16 for WAFO by m2html © 2003