Home > wafo > wsim > cov2csdat.m

cov2csdat

PURPOSE ^

generates conditionally simulated values

SYNOPSIS ^

[sampl,mu1o, mu1oStd, inds] = cov2csdat(xo,R,cases,method,inds)

DESCRIPTION ^

  COV2CSDAT generates conditionally simulated values 
  
  CALL:  [sample,mu1o, mu1oStd]  = cov2csdat(x,R,cases,method,inds) 
  
    sample = a random sample of the missing values conditioned on the 
             observed (known) data. 
      mu1o = expected values of the missing values conditioned on the 
             observed data. 
   mu1oStd = Standard deviation of mu1o. 
  
        x  = datavector including missing data.  
              (missing data must be NaN if inds is not given) 
        R  = Auto Covariance Function structure  
             (NB! must have the same spacing as x) 
     cases = number of cases, i.e., number of columns of sample (default=1) 
    method = 'approximate': (default) condition only on the closest 
                             points. Pros: quite fast 
             'pseudo': Uses the pseudo inverse to calculate conditional 
                       covariance matrix 
             'exact' : doing the exact simulation. 
                      Cons: Slow for large data sets, may not  
                            return any result due to the covariance  
                            matrix being singular or nearly singular. 
      inds = indices to spurious or missing data (see x) 
  
   COV2CSDAT generates the missing values from x conditioned on the observed 
   values assuming x comes from a multivariate Gaussian distribution 
   with zero expectation and Auto Covariance function R. 
  
  See also  wmnormrnd, cov2sdat

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function [sampl,mu1o, mu1oStd, inds]  = cov2csdat(xo,R,cases,method,inds) 
0002 % COV2CSDAT generates conditionally simulated values 
0003 % 
0004 % CALL:  [sample,mu1o, mu1oStd]  = cov2csdat(x,R,cases,method,inds) 
0005 % 
0006 %   sample = a random sample of the missing values conditioned on the 
0007 %            observed (known) data. 
0008 %     mu1o = expected values of the missing values conditioned on the 
0009 %            observed data. 
0010 %  mu1oStd = Standard deviation of mu1o. 
0011 % 
0012 %       x  = datavector including missing data.  
0013 %             (missing data must be NaN if inds is not given) 
0014 %       R  = Auto Covariance Function structure  
0015 %            (NB! must have the same spacing as x) 
0016 %    cases = number of cases, i.e., number of columns of sample (default=1) 
0017 %   method = 'approximate': (default) condition only on the closest 
0018 %                            points. Pros: quite fast 
0019 %            'pseudo': Uses the pseudo inverse to calculate conditional 
0020 %                      covariance matrix 
0021 %            'exact' : doing the exact simulation. 
0022 %                     Cons: Slow for large data sets, may not  
0023 %                           return any result due to the covariance  
0024 %                           matrix being singular or nearly singular. 
0025 %     inds = indices to spurious or missing data (see x) 
0026 % 
0027 %  COV2CSDAT generates the missing values from x conditioned on the observed 
0028 %  values assuming x comes from a multivariate Gaussian distribution 
0029 %  with zero expectation and Auto Covariance function R. 
0030 % 
0031 % See also  wmnormrnd, cov2sdat 
0032  
0033 %Tested on : matlab 5.3, 5.1 
0034 % History: 
0035 % revised jr 02.07.2000 
0036 %  - mnormrnd -> wmnormrnd 
0037 % revised pab 17.01.2000 
0038 %  - updated documentation 
0039 % revised pab 12.10.1999 
0040 %   
0041 % last modified by Per A. Brodtkorb 17.09.98,31.08.98,27.08.98,16.08.98 
0042  
0043 % secret methods: 
0044 %         'dec1-3': different decomposing algorithm's  
0045 %                   which is only correct for a variables 
0046 %                   having the Markov property  
0047 %                   Cons:  3 is not correct at all, but seems to give 
0048 %                          a reasonable result  
0049 %                   Pros: 1 is slow, 2 is quite fast and 3 is very fast 
0050 %                   Note: (mu1oStd is not given for method ='dec3') 
0051  
0052 x=xo(:); 
0053 N=length(x); 
0054 if isstruct(R) 
0055   ACF=R.R; 
0056 else 
0057   error('Covariance must be a struct') 
0058 end 
0059 [n m]=size(ACF); 
0060  
0061 if (n==m)&(m~=1),%No Auto Covariance Function is given 
0062   error('not an ACF') 
0063 end 
0064 ACF=ACF(:);n=max(n,m); 
0065 [y I]=max(ACF); 
0066 switch I, 
0067   case 1,% ACF starts with zero lag     
0068   otherwise,error('This is not a valid ACF!!') 
0069 end 
0070    
0071 if (nargin==5)&~isempty(inds), 
0072   x(inds)=NaN; 
0073 end 
0074 inds=isnan(x);%indices to the unknown observations 
0075 Ns=sum(inds);% # missing values 
0076 if Ns==0, 
0077  disp('Warning: No missing data, unable to continue.') 
0078  sampel=[]; 
0079  return 
0080 end 
0081 if nargin<3|isempty(cases), 
0082   cases=1; 
0083 end 
0084 if nargin<4|isempty(method), 
0085   method='approximate'; % default method 
0086 end 
0087  
0088 if Ns==N,% simulated surface from the apriori distribution 
0089   sampel=cov2sdat(R,[N cases]); 
0090   disp('All data missing,  returning sample from the unconditional distribution.') 
0091   return 
0092 end 
0093  
0094 %initializing variables 
0095 mu1o=zeros(Ns,1); 
0096 mu1oStd=mu1o; 
0097 sampel=zeros(Ns,cases); 
0098 if method(1)=='d', 
0099   xs=cov2sdat(R,[N cases]);% simulated surface from the apriori distribution 
0100   mu1os=sampel; 
0101 end 
0102  
0103  
0104 switch method(1:4), 
0105   case 'dec1' , % only correct for variables having the Markov property 
0106     % but still seems to give a reasonable answer. Slow procedure. 
0107     Sigma=sptoeplitz([ACF;zeros(N-n,1)]); 
0108   
0109     %Soo=Sigma(~inds,~inds); % covariance between known observations 
0110     %S11=Sigma(inds,inds); % covariance between unknown observations 
0111     %S1o=Sigma(inds,~inds);% covariance between known and unknown observations 
0112     %tmp=S1o*pinv(full(Soo));  
0113     %tmp=S1o/Soo; % this is time consuming if Soo large 
0114     tmp=2*Sigma(inds,~inds)/(Sigma(~inds,~inds) +Sigma(~inds,~inds)' ); 
0115      
0116     if nargout==3, %|nargout==0, 
0117       %standard deviation of the expected surface 
0118       %mu1oStd=sqrt(diag(S11-tmp*S1o')); 
0119       mu1oStd=sqrt(diag(Sigma(inds,inds)-tmp*Sigma(~inds,inds))); 
0120     end 
0121      
0122     %expected surface conditioned on the known observations from x 
0123     mu1o=tmp*(x(~inds)); 
0124     %expected surface conditioned on the known observations from xs 
0125     mu1os=tmp*(xs(~inds,:)); 
0126     % sampled surface conditioned on the known observations 
0127     sampel=mu1o(:,ones(1,cases))+xs(inds,:)-mu1os;  
0128      
0129   case 'dec2',% only correct for variables having the Markov property 
0130     % but still seems to give a reasonable answer 
0131     % approximating the expected surfaces conditioned on  
0132     % the known observations from x and xs by only using the closest points 
0133     Sigma=sptoeplitz([ACF;zeros(n,1)]); 
0134     n2=floor(n/2); 
0135     inds2=find(inds); 
0136     idx=(1:2*n)'+max(0,inds2(1)-n2);% indices to the points used 
0137     tmpinds=inds; % temporary storage of indices to missing points 
0138     tinds=tmpinds(idx);% indices to the points used 
0139     ns=sum(tinds); % number of missing data in the interval 
0140     nprev=0; % number of previously simulated points 
0141     xsinds=xs(inds,:); 
0142     while ns>0, 
0143       tmp=2*Sigma(tinds,~tinds)/(Sigma(~tinds,~tinds)+Sigma(~tinds,~tinds)'); 
0144       if nargout==3|nargout==0, 
0145     %standard deviation of the expected surface 
0146     %mu1oStd=sqrt(diag(S11-tmp*S1o')); 
0147     mu1oStd((nprev+1):(nprev+ns))=... 
0148         max(mu1oStd((nprev+1):(nprev+ns)), ... 
0149         sqrt(diag(Sigma(tinds,tinds)-tmp*Sigma(~tinds,tinds)))); 
0150       end 
0151        
0152       %expected surface conditioned on the closest known observations 
0153       % from x and xs2 
0154       mu1o((nprev+1):(nprev+ns))=tmp*(x(idx(~tinds))); 
0155       mu1os((nprev+1):(nprev+ns),:)=tmp*(xs(idx(~tinds),:));       
0156         
0157       if idx(end)==N,%  
0158     ns=0; % no more points to simulate 
0159       else 
0160     % updating by  putting expected surface into x      
0161     x(idx(tinds))=mu1o((nprev+1):(nprev+ns)); 
0162     xs(idx(tinds))=mu1os((nprev+1):(nprev+ns)); 
0163  
0164     nw=sum(tinds((end-n2):end));% # data which we want to simulate once  
0165     tmpinds(idx(1:(end-n2-1)))=0; % removing indices to data .. 
0166                                   % which has been simulated 
0167     nprev=nprev+ns-nw;% update # points simulated so far 
0168                        
0169     if (nw==0)&(nprev<Ns),  
0170       idx=(1:2*n)'+(inds2(nprev+1)-n2); % move to the next missing data 
0171     else 
0172       idx=idx+n; 
0173     end 
0174     tmp=N-idx(end); 
0175     if tmp<0, % checking if tmp exceeds the limits 
0176       idx=idx+tmp; 
0177     end 
0178     % find new interval with missing data 
0179     tinds=tmpinds(idx); 
0180     ns= sum(tinds);% # missing data 
0181       end   
0182     end 
0183     % sampled surface conditioned on the known observations 
0184     sampel=mu1o(:,ones(1,cases))+xsinds-mu1os;  
0185      
0186   case 'dec3', % this is not correct for even for variables having the  
0187     % Markov property but still seems to give a reasonable answer 
0188     % a quasi approach approximating the expected surfaces conditioned on  
0189     % the known observations from x and xs with a spline 
0190     inds2=find(inds);indg=find(~inds); 
0191     mu1o=interp1(indg, x(~inds),inds2,'spline'); 
0192     mu1os=interp1(indg, xs(~inds,:),inds2,'spline'); 
0193     % sampled surface conditioned on the known observations 
0194     sampel=mu1o(:,ones(1,cases))+xs(inds,:)-mu1os;  
0195  
0196   case {'exac','pseu'}, % exact but slow. It also may not return any result 
0197     Sigma=sptoeplitz([ACF;zeros(N-n,1)]); 
0198     %Soo=Sigma(~inds,~inds); % covariance between known observations 
0199     %S11=Sigma(inds,inds); % covariance between unknown observations 
0200     %S1o=Sigma(inds,~inds);% covariance between known and unknown observations 
0201     %tmp=S1o/Soo; % this is time consuming if Soo large 
0202     if method(1)=='e',%exact 
0203       tmp=2*Sigma(inds,~inds)/(Sigma(~inds,~inds)+Sigma(~inds,~inds)'); 
0204     else % approximate the inverse with pseudo inverse 
0205       tmp==Sigma(inds,~inds)*pinv(Sigma(~inds,~inds)); 
0206     end 
0207     %expected surface conditioned on the known observations from x 
0208     mu1o=tmp*(x(~inds)); 
0209     % Covariance conditioned on the known observations 
0210     Sigma1o=Sigma(inds,inds)-tmp*Sigma(~inds,inds); 
0211     %sampel conditioned on the known observations from x 
0212     sampel=wmnormrnd(mu1o,Sigma1o,cases ); 
0213      
0214     if nargout==3, %|nargout==0, 
0215       %standard deviation of the expected surface 
0216       mu1oStd=sqrt(diag(Sigma1o)); 
0217     end 
0218      case 'circ',% approximating by embedding an circulant matrix 
0219        % using that the inverse of the circulant covariance matrix has  
0220        % approximately the same bandstructure as the inverse of the 
0221        % covariance matrix 
0222        xx2=[x ;zeros(2*n,1);x(end-1:2)];    
0223        inds=inds(:); 
0224        inds2=[inds ;ones(2*n,1); inds(end-2:2)];  
0225        nfft=2^nextpow2(2*N+2*n-2); 
0226        acfC=[ACF(1:n);zeros(nfft-2*n+2,1);ACF((n-1):-1:2)];% circulant vector 
0227        % eigenvalues to the circulant covariance matrix 
0228        lambda=real(fft(acfC,nfft)); 
0229        % eigenvalues to the inverse circulant covariance matrix 
0230        lambdainv=1./lambda; 
0231        k1=find(isinf(lambdainv)) 
0232        if any(k1), 
0233      disp('Warning procedure is not accurate') 
0234      lambdainv(k)=0; 
0235        end 
0236       acfCinv=real(fft(lambdainv,nfft)/nfft)'; % circulant vector 
0237        k=find(abs(acfCinv/acfCinv(1))>0.2); 
0238        sigmainv = spdiags( acfCinv(ones(nfft,1),k), k-1, nfft, nfft) 
0239       %not finished 
0240   case 'appr',% approximating by only  condition on  
0241     % the closest points 
0242     % checking approximately how many lags we need in order to  
0243     % ensure conditional independence 
0244     % using that the inverse of the circulant covariance matrix has  
0245     % approximately the same bandstructure as the inverse of the 
0246     % covariance matrix 
0247     if 0, 
0248       nfft=2^nextpow2(2*N-2); 
0249       acfC=[ACF(1:n);zeros(nfft-2*n+2,1);ACF((n-1):-1:2)];% circulant vector 
0250       % eigenvalues to the circulant covariance matrix 
0251       lambda=real(fft(acfC,nfft)); 
0252       % eigenvalues to the inverse circulant covariance matrix 
0253       lambda=1./lambda; 
0254       lambda(isinf(lambda))=0; 
0255       acfC=real(fft(lambda,nfft)/nfft); % circulant vector 
0256       acfC=acfC/acfC(1); % relative significance of each lag 
0257       % minimum lag distance for which samples are conditionally  
0258       % independent, Nsig is set to 4 times this value 
0259       Nsig=find(abs(acfC(1:nfft/2))>0.02);        
0260       Nsig=min(max(2*n,4*Nsig(end)+2),N);% size of Sigma 
0261     else 
0262       Nsig=2*n; 
0263     end 
0264     Sigma=sptoeplitz([ACF;zeros(Nsig-n,1)]); 
0265     n2=floor(Nsig/4); 
0266     inds2=find(inds); 
0267     idx=(1:Nsig)'+max(0,inds2(1)-n2);% indices to the points used 
0268     tmpinds=inds; % temporary storage of indices to missing points 
0269     tinds=tmpinds(idx);% indices to the points used 
0270     ns=sum(tinds); % number of missing data in the interval 
0271     nprev=0; % number of previously simulated points 
0272     x2=x; 
0273      
0274     while ns>0, 
0275       %make sure MATLAB uses a symmetric matrix solver 
0276       tmp=2*Sigma(tinds,~tinds)/(Sigma(~tinds,~tinds)+Sigma(~tinds,~tinds)'); 
0277       Sigma1o=Sigma(tinds,tinds)-tmp*Sigma(~tinds,tinds); 
0278       if nargout==3|nargout==0, 
0279     %standard deviation of the expected surface 
0280     %mu1oStd=sqrt(diag(S11-tmp*S1o')); 
0281     mu1oStd((nprev+1):(nprev+ns))=... 
0282         max( mu1oStd((nprev+1):(nprev+ns)) , sqrt(diag(Sigma1o))); 
0283       end 
0284  
0285       %expected surface conditioned on the closest known observations from x 
0286       mu1o((nprev+1):(nprev+ns))=tmp*(x2(idx(~tinds))); 
0287       %sample conditioned on the known observations from x 
0288       sampel((nprev+1):(nprev+ns),:) =... 
0289       wmnormrnd(tmp*(x(idx(~tinds))),Sigma1o,cases );      
0290       if idx(end)==N,%  
0291     ns=0; % no more points to simulate 
0292       else 
0293     % updating 
0294     x2(idx(tinds))= mu1o((nprev+1):(nprev+ns)); %expected surface 
0295     x(idx(tinds))=sampel((nprev+1):(nprev+ns),1);%sampled surface  
0296     nw=sum(tinds((end-n2):end));% # data we want to simulate once more 
0297     tmpinds(idx(1:(end-n2-1)))=0; % removing indices to data .. 
0298                                   % which has been simulated 
0299     nprev=nprev+ns-nw;% update # points simulated so far 
0300      
0301     if (nw==0)&(nprev<Ns),  
0302       idx=(1:Nsig)'+(inds2(nprev+1)-n2); % move to the next missing data 
0303     else 
0304       idx=idx+n; 
0305     end 
0306     tmp=N-idx(end); 
0307     if tmp<0, % checking if tmp exceeds the limits 
0308       idx=idx+tmp; 
0309     end 
0310     % find new interval with missing data 
0311     tinds=tmpinds(idx); 
0312     ns= sum(tinds);% # missing data in the interval 
0313       end 
0314     end 
0315 end 
0316  
0317 %size(mu1oStd) 
0318 if nargout==0 
0319   plot(find(~inds),x(~inds),'.') 
0320   hold on, 
0321   ind=find(inds); 
0322   plot(ind,mu1o   ,'*') 
0323   plot(ind,sampel,'r+') 
0324   %mu1oStd 
0325   plot(ind,[mu1o-2*mu1oStd mu1o+2*mu1oStd ] ,'d') 
0326   %plot(xs),plot(ind,mu1os,'r*') 
0327   hold off 
0328   legend('observed values','mu1o','sampled values','2 stdev') 
0329   %axis([770 850 -1 1]) 
0330   %axis([1300 1325 -1 1]) 
0331 else 
0332   sampl=sampel; 
0333 end 
0334  
0335 function y=sptoeplitz(x) 
0336   k=find(x); 
0337   x=x(:)'; 
0338   n=length(x); 
0339   if length(k)>0.3*n, 
0340     y=toeplitz(x); 
0341   else 
0342     y = spdiags( x(ones(n,1),k), k-1, n, n); 
0343     k(k(1)==1)=[]; 
0344     y=y+spdiags( x(ones(n,1),k), -k+1, n, n); 
0345  end 
0346

Mathematical Statistics
Centre for Mathematical Sciences
Lund University with Lund Institute of Technology

Comments or corrections to the WAFO group


Generated on Thu 06-Oct-2005 02:21:16 for WAFO by m2html © 2003