varpro 0.14.0

A straightforward nonlinear least-squares fitting library which uses the Variable Projection algorithm.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
function [alpha, c, wresid, wresid_norm, y_est, Regression] = ...
          varpro(y, w, alpha, n, ada, lb, ub, options)
%VARPRO Solve a separable nonlinear least squares problem.
% [alpha, c, wresid, wresid_norm, y_est, Regression] =
%             VARPRO(y, w, alpha, n, ada, lb, ub, options)
%
% Given a set of m observations y(1),...,y(m)
% this program computes a weighted least squares fit using the model
%
%    eta(alpha,c,t) = 
%            c_1 * phi_1 (alpha,t) + ...  + c_n * phi_n (alpha,t) 
% (possibly with an extra term  + phi_{n+1} (alpha,t) ).
%
% This program determines optimal values of the q nonlinear parameters
% alpha and the n linear parameters c, given observations y at m
% different values of the "time" t and given evaluation of phi and 
% (optionally) derivatives of phi.
%
% On Input:
%
%   y    m x 1   vector containing the m observations.
%   w    m x 1   vector of weights used in the least squares
%                fit.  We minimize the norm of the weighted residual
%                vector r, where, for i=1:m,
%
%                r(i) = w(i) * (y(i) - eta(alpha, c, t(i,:))).
%
%                Therefore, w(i) should be set to 1 divided by
%                the standard deviation in the measurement y(i).  
%                If this number is unknown, set w(i) = 1.
%   alpha q x 1  initial estimates of the parameters alpha.
%                If alpha = [], Varpro assumes that the problem
%                is linear and returns estimates of the c parameters.
%   n            number of linear parameters c
%   ada          a function handle, described below.
%   lb    q x 1  lower bounds on the parameters alpha. 
%   (Optional)   (Omit this argument or use [] if there are
%                no lower bounds.)
%   ub    q x 1  upper bounds on the parameters alpha. 
%   (Optional)   (Omit this argument or use [] if there are
%                no upper bounds.)
%   options      The Matlab optimization parameter structure,
%   (Optional)   set by "optimset", to control convergence
%                tolerances, maximum number of function evaluations,
%                information displayed in the command window, etc. 
%                To use default options, omit this parameter.
%                To determine the default options, type
%                    options = optimset('lsqnonlin')
%                After doing this, the defaults can be modified;
%                to modify the display option, for example, type
%                    options = optimset('lsqnonlin');
%                    optimset(options,'Display','iter');
%
% On Output:
%
%  alpha  q x 1  estimates of the nonlinear parameters.
%  c      n x 1  estimates of the linear parameters.
%  wresid m x 1  weighted residual vector, with i-th component
%                w(i) * (y(i) - eta(alpha, c, t(i,:))).
%  wresid_norm   norm of wresid.
%  y_est  m x 1  the model estimates = eta(alpha, c, t(i,:)))
%  Regression    a structure containing diagnostics about the model fit.
%                **************************************************
%                *                C a u t i o n:                  *
%                *   The theory that makes statistical            *
%                *   diagnostics useful is derived for            *
%                *   linear regression, with no upper- or         *
%                *   lower-bounds on variables.                   *
%                *   The relevance of these quantities to our     *
%                *   nonlinear model is determined by how well    *
%                *   the linearized model (Taylor series model)   *
%                *         eta(alpha_true, c_true)                *
%                *            +  Phi * (c  - c_true)              *
%                *            + dPhi * (alpha - alpha_true)       *
%                *   fits the data in the neighborhood of the     *
%                *   true values for alpha and c, where Phi       *
%                *   and dPhi contain the partial derivatives     *
%                *   of the model with respect to the c and       *
%                *   alpha parameters, respectively, and are      *
%                *   defined in ada.                              *
%                **************************************************
%  Regression.report:  
%                This structure includes information on the solution
%                process, including the number of iterations, 
%                termination criterion, and exitflag from lsqnonlin.
%                (Type 'help lsqnonlin' to see the exit conditions.)
%                Regression.report.rank is the computed rank of the 
%                matrix for the linear subproblem.  If this equals
%                n, then the linear coefficients are well-determined.
%                If it is less than n, then although the model might
%                fit the data well, other linear coefficients might
%                give just as good a fit.
%  Regression.sigma:        
%                The estimate of the standard deviation is the
%                weighted residual norm divided by the square root
%                of the number of degrees of freedom.
%                This is also called the "regression standard error"
%                or the square-root of the weighted SSR (sum squared
%                residual) divided by the square root of the
%                number of degrees of freedom.
%  Regression.RMS:
%                The "residual mean square" is equal to sigma^2:
%                RMS = wresid_norm^2 / (m-n+q)
%  Regression.coef_determ:
%                The "coefficient of determination" for the fit,
%                also called the square of the multiple correlation
%                coefficient, is sometimes called R^2.
%                It is computed as 1 - wresid_norm^2/CTSS,
%                where the "corrected total sum of squares"
%                CTSS is the norm-squared of W*(y-y_bar),
%                and the entries of y_bar are all equal to
%                (the sum of W_i^2 y_i) divided by (the sum of W_i^2).
%                A value of .95, for example, indicates that 95 per 
%                cent of the CTTS is accounted for by the fit.
%
%  Regression.CovMx: (n+q) x (n+q)
%                This is the estimated variance/covariance matrix for
%                the parameters.  The linear parameters c are ordered
%                first, followed by the nonlinear parameters alpha.
%                This is empty if dPhi is not computed by ada.
%  Regression.CorMx: (n+q) x (n+q)
%                This is the estimated correlation matrix for the 
%                parameters.  The linear parameters c are ordered
%                first, followed by the nonlinear parameters alpha.
%                This is empty if dPhi is not computed by ada.
%  Regression.std_param: (n+q) x 1
%                This vector contains the estimate of the standard 
%                deviation for each parameter.
%                The k-th element is the square root of the k-th main 
%                diagonal element of Regression.CovMx
%                This is empty if dPhi is not computed by ada.
%  Regression.t_ratio:   (n+q) x 1
%                The t-ratio for each parameter is equal to the
%                parameter estimate divided by its standard deviation.
%                (linear parameters c first, followed by alpha)
%                This is empty if dPhi is not computed by ada.
%  Regression.standardized_wresid:
%                The k-th component of the "standardized weighted 
%                residual" is the k-th component of the weighted 
%                residual divided by its standard deviation.
%                This is empty if dPhi is not computed by ada.
%
%---------------------------------------------------------------
% Specification of the function ada, which computes information
% related to Phi:
%
%   function [Phi,dPhi,Ind] = ada(alpha)
%
%     This function computes Phi and, if possible, dPhi.
%
%     On Input: 
%
%        alpha q x 1    contains the current value of the alpha parameters.
%
%        Note:  If more input arguments are needed, use the standard
%               Matlab syntax to accomplish this.  For example, if
%               the input arguments to ada are t, z, and alpha, then
%               before calling varpro, initialize t and z, and in calling 
%               varpro, replace "@ada" by "@(alpha)ada(t,z,alpha)".
%
%     On Output:
%
%        Phi   m x n1   where Phi(i,j) = phi_j(alpha,t_i).
%                       (n1 = n if there is no extra term; 
%                        n1 = n+1 if an extra term is used)
%        dPhi  m x p    where the columns contain partial derivative
%                       information for Phi and p is the number of 
%                       columns in Ind 
%                       (or dPhi = [] if derivatives are not available).
%        Ind   2 x p    Column k of dPhi contains the partial
%                       derivative of Phi_j with respect to alpha_i, 
%                       evaluated at the current value of alpha, 
%                       where j = Ind(1,k) and i = Ind(2,k).
%                       Columns of dPhi that are always zero, independent
%                       of alpha, need not be stored. 
%        Example:  if  phi_1 is a function of alpha_2 and alpha_3, 
%                  and phi_2 is a function of alpha_1 and alpha_2, then 
%                  we can set
%                          Ind = [ 1 1 2 2
%                                  2 3 1 2 ]
%                  In this case, the p=4 columns of dPhi contain
%                          d phi_1 / d alpha_2,
%                          d phi_1 / d alpha_3,
%                          d phi_2 / d alpha_1,
%                          d phi_2 / d alpha_2,
%                  evaluated at each t_i.
%                  There are no restrictions on how the columns of
%                  dPhi are ordered, as long as Ind correctly specifies
%                  the ordering.
%
%        If derivatives dPhi are not available, then set dPhi = Ind = [].
%      
%---------------------------------------------------------------
%
%  Varpro calls lsqnonlin, which solves a constrained least squares
%  problem with upper and lower bounds on the constraints.  What
%  distinguishes varpro from lsqnonlin is that, for efficiency and
%  reliability, varpro causes lsqnonlin to only iterate on the
%  nonlinear parameters.  Given the information in Phi and dPhi, this 
%  requires an intricate but inexpensive computation of partial 
%  derivatives, and this is handled by the varpro function formJacobian.
%
%  lsqnonlin is in Matlab's Optimization Toolbox.  Another solver
%  can be substituted if the toolbox is not available.
%
%  Any parameters that require upper or lower bounds should be put in
%  alpha, not c, even if they appear linearly in the model.
%  
%  The original Fortran implementation of the variable projection 
%  algorithm (ref. 2) was modified in 1977 by John Bolstad 
%  Computer Science Department, Serra House, Stanford University,
%  using ideas of Linda Kaufman (ref. 5) to speed up the 
%  computation of derivatives.  He also allowed weights on
%  the observations, and computed the covariance matrix.
%  Our Matlab version takes advantage of 30 years of improvements
%  in programming languages and minimization algorithms.  
%  In this version, we also allow upper and lower bounds on the 
%  nonlinear parameters.
%
%  It is hoped that this implementation will be of use to Matlab
%  users, but also that its simplicity will make it easier for the
%  algorithm to be implemented in other languages.
%
%  This program is documented in
%  Dianne P. O'Leary and Bert W. Rust,
%  Variable Projection for Nonlinear Least Squares Problems,
%  US National Inst. of Standards and Technology, 2010.
%
%  Main reference:
%
%    0.  Gene H. Golub and V. Pereyra, 'Separable nonlinear least   
%        squares: the variable projection method and its applications,'
%        Inverse Problems 19, R1-R26 (2003).
%         
%  See also these papers, cited in John Bolstad's Fortran code:
%                                                                       
%    1.  Gene H. Golub and V. Pereyra, 'The differentiation of      
%        pseudo-inverses and nonlinear least squares problems whose 
%        variables separate,' SIAM J. Numer. Anal. 10, 413-432      
%         (1973).                                                    
%    2.  ------, same title, Stanford C.S. Report 72-261, Feb. 1972.
%    3.  Michael R. Osborne, 'Some aspects of non-linear least      
%        squares calculations,' in Lootsma, Ed., 'Numerical Methods 
%        for Non-Linear Optimization,' Academic Press, London, 1972.
%    4.  Fred Krogh, 'Efficient implementation of a variable projection
%        algorithm for nonlinear least squares problems,'           
%        Comm. ACM 17:3, 167-169 (March, 1974).                    
%    5.  Linda Kaufman, 'A variable projection method for solving  
%        separable nonlinear least squares problems', B.I.T. 15,   
%        49-57 (1975).                                             
%    6.  C. Lawson and R. Hanson, Solving Least Squares Problems,
%        Prentice-Hall, Englewood Cliffs, N. J., 1974.          
%
%  These books discuss the statistical background:
%
%    7.  David A. Belsley, Edwin Kuh, and Roy E. Welsch, Regression 
%        Diagnostics, John Wiley & Sons, New York, 1980, Chap. 2.
%    8.  G.A.F. Seber and C.J. Wild, Nonlinear Regression,
%        John Wiley & Sons, New York, 1989, Sec. 2.1, 5.1, and 5.2.
%
%  Dianne P. O'Leary, NIST and University of Maryland, February 2011.
%  Bert W. Rust,      NIST                             February 2011.
%  Comments updated 07-2011.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

global mydebug myneglect   % test neglect of extra term in Jacobian

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Initialization: Check input, set default parameters and options.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% special case for octave
is_octave = exist('OCTAVE_VERSION', 'builtin') > 0;
if (is_octave)
  pkg load optim;
end

[m,ell] = size(y);      % m = number of observations
[m1,ell1] = size(w);

if (m1 ~= m) | (ell > 1) | (ell1 > 1)
  error('y and w must be column vectors of the same length')
end

[q,ell] = size(alpha);   % q = number of nonlinear parameters

if (ell > 1)
  error('alpha must be a column vector containing initial guesses for nonlinear parameters')
end

if (nargin < 6)  
  lb = [];
else
  [q1,ell] = size(lb);
  if (q1 > 0) & (ell > 0)
    if (q1 ~= q) | (ell > 1)
      error('lb must be empty or a column vector of the same length as alpha')
    end
  end
end

if (nargin < 7)
  ub = [];
else
  [q1,ell] = size(ub);
  if (q1 > 0) & (ell > 0)
    if (q1 ~= q) | (ell > 1)
      error('ub must be empty or a column vector of the same length as alpha')
    end
  end
end

if (nargin < 8)
  options = optimset('lsqnonlin');  
end

if (~strcmp(options.Display,'off'))
  disp(sprintf('\n-------------------'))
  disp(sprintf('VARPRO is beginning.'))
end

W = spdiags(w,0,m,m);  % Create an m x m diagonal matrix from the vector w

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Make the first call to ada and do some error checking.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

[Phi, dPhi, Ind] = feval(ada, alpha);

[m1,n1] = size(Phi);     % n1 = number of basis functions Phi.

[m2,n2] = size(dPhi);
[ell,n3] = size(Ind);

if (m1 ~= m2) & (m2 > 0)
  error('In user function ada: Phi and dPhi must have the same number of rows.')
end

if (n1 < n) | (n1 > n+1)
  error('In user function ada: The number of columns in Phi must be n or n+1.')
end

if (n2 > 0) & (ell ~= 2)
  error('In user function ada: Ind must have two rows.')
end

if (n2 > 0) & (n2 ~= n3)
  error('In user function ada: dPhi and Ind must have the same number of columns.')
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Solve the least squares problem using lsqnonlin or, if there
% are no nonlinear parameters, using the SVD procedure in formJacobian.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if (q > 0) % The problem is nonlinear.

  if ~isempty(dPhi)
    options = optimset(options,'Jacobian','on');
  end

  [alpha, wresid_norm2, wresid, exitflag,output] = ...
    lsqnonlin(@f_lsq, alpha, lb, ub, options);
  [r, Jacobian, Phi, dPhi, y_est, rank] = f_lsq(alpha);
  wresid_norm = sqrt(wresid_norm2);
  Regression.report = output;
  Regression.report.rank = rank;
  Regression.report.exitflag = exitflag;

else       % The problem is linear.

  if (~strcmp(options.Display,'off'))
    disp(sprintf('VARPRO problem is linear, since length(alpha)=0.'))
  end

  [Jacobian, c, wresid, y_est, Regression.report.rank] =  ...
    formJacobian(alpha, Phi, dPhi);
  wresid_norm = norm(wresid);
  wresid_norm2 = wresid_norm * wresid_norm;

end % if q > 0

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Compute some statistical diagnostics for the solution.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Calculate sample variance,  the norm-squared of the residual
%    divided by the number of degrees of freedom.

sigma2 = wresid_norm2 / (m-n-q);

% Compute  Regression.sigma:        
%                square-root of weighted residual norm squared divided 
%                by number of degrees of freedom.

Regression.sigma = sqrt(sigma2);

% Compute  Regression.coef_determ:
%                The coeficient of determination for the fit,
%                also called the square of the multiple correlation
%                coefficient, or R^2.
%                It is computed as 1 - wresid_norm^2/CTSS,
%                where the "corrected total sum of squares"
%                CTSS is the norm-squared of W*(y-y_bar),
%                and the entries of y_bar are all equal to
%                (the sum of W_i y_i) divided by (the sum of W_i).

y_bar = sum(w.*y) / sum(w);
CTTS = norm(W * (y - y_bar)) ^2;
Regression.coef_determ = 1 - wresid_norm^2 / CTTS;

% Compute  Regression.RMS = sigma^2:
%                the weighted residual norm divided by the number of
%                degrees of freedom.
%                RMS = wresid_norm / sqrt(m-n+q)

Regression.RMS = sigma2;                        

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Compute some additional statistical diagnostics for the 
%  solution, if the user requested it.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if (nargout==6) 

  if (isempty(dPhi))

    Regression.CovMx = [];
    Regression.CorMx = [];
    Regression.std_param = [];
    Regression.t_ratio = [];
    Regression.standardized_wresid = [];

  else

    % Calculate the covariance matrix CovMx, which is sigma^2 times the
    % inverse of H'*H, where  
    %              H = W*[Phi,J] 
    % contains the partial derivatives of  wresid  with
    % respect to the parameters in alpha and c.

    [xx,pp] = size(dPhi);
    J = zeros(m,q);
    for kk = 1:pp,
      j = Ind(1,kk);
      i = Ind(2,kk);
      if (j > n)
        J(:,i) = J(:,i) + dPhi(:,kk);
      else
        J(:,i) = J(:,i) + c(j) * dPhi(:,kk);
      end
    end
    [Qj,Rj,Pj] = qr(W*[Phi(:,1:n),J], 0);     % Uses compact pivoted QR.
    T2 = Rj \ (eye(size(Rj,1)));
    CovMx = sigma2 * T2 * T2';
    Regression.CovMx(Pj,Pj) = CovMx;  % Undo the pivoting permutation.

    % Compute  Regression.CorMx:        
    %                estimated correlation matrix (n+q) x (n+q) for the
    %                parameters.  The linear parameters are ordered
    %                first, followed by the nonlinear parameters.

    d = 1 ./ sqrt(diag(Regression.CovMx));
    D = spdiags(d,0,n+q,n+q);
    Regression.CorMx = D * Regression.CovMx * D;

    % Compute  Regression.std_param:
    %                The k-th element is the square root of the k-th main 
    %                diagonal element of CovMx. 

    Regression.std_param = sqrt(diag(Regression.CovMx));

    % Compute  Regression.t_ratio:
    %                parameter estimates divided by their standard deviations.

    alpha = reshape(alpha, q, 1);
    Regression.t_ratio = [c; alpha] .* d;

    % Compute  Regression.standardized_wresid:
    %                The k-th component is the k-th component of the
    %                weighted residual, divided by its standard deviation.
    %                Let X = W*[Phi,J], 
    %                    h(k) = k-th main diagonal element of covariance
    %                           matrix for wresid
    %                         = k-th main diagonal element of X*inv(X'*X)*X' 
    %                         = k-th main diagonal element of Qj*Qj'.
    %                Then the standard deviation is estimated by
    %                sigma*sqrt(1-h(k)).

    for k=1:m
      temp(k,1) = Qj(k,:) * Qj(k,:)';
    end
    Regression.standardized_wresid = wresid ./(Regression.sigma*sqrt(1-temp));

  end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% End of statistical diagnostics computations.
% Print some final information if desired.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


if (~strcmp(options.Display,'off'))
  disp(sprintf(' '))
  disp(sprintf('VARPRO Results:'))
  disp(sprintf(' Linear Parameters:'))
  disp(sprintf(' %15.7e ',c))

  disp(sprintf(' Nonlinear Parameters:'))
  disp(sprintf(' %15.7e ',alpha))
  disp(sprintf(' '))

  disp(sprintf(' Norm-squared of weighted residual  = %15.7e',wresid_norm2))
  disp(sprintf(' Norm-squared of data vector        = %15.7e',norm(w.*y)^2))
  disp(sprintf(' Norm         of weighted residual  = %15.7e',wresid_norm))
  disp(sprintf(' Norm         of data vector        = %15.7e',norm(w.*y)))
  disp(sprintf(' Expected error of observations     = %15.7e',Regression.sigma))
  disp(sprintf(' Coefficient of determination       = %15.7e',Regression.coef_determ))
  disp(sprintf('VARPRO is finished.'))
  disp(sprintf('-------------------\n'))
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% The computation is now completed.  
%
% varpro uses the following two functions, f_lsq and formJacobian.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%--------------------------- Beginning of f_lsq --------------------------

  function [wr_trial, Jacobian, Phi_trial, dPhi_trial, y_est,myrank] = ...
      f_lsq(alpha_trial)

    % function [wr_trial,Jacobian,Phi_trial,dPhi_trial,y_est,myrank] = ...
    %                                                       f_lsq(alpha_trial)
    %
    % This function is used by lsqnonlin to compute 
    %          wr_trial   the current weighted residual
    %          Jacobian   the Jacobian matrix for the nonlinear parameters
    % It also computes
    %          Phi_trial  the current Phi matrix
    %          dPhi_trial the partial derivatives of Phi_trial (if available).
    %          y_est      the model estimates of y
    %          myrank     the rank of the matrix W*Phi in the linear problem.
    %
    % It uses the user-supplied function ada and the Varpro function formJacobian.

    [Phi_trial, dPhi_trial, Ind] = feval(ada, alpha_trial);

    [Jacobian, c, wr_trial, y_est,myrank] = ...
      formJacobian(alpha_trial, Phi_trial, dPhi_trial);

  end %--------------------------- End of f_lsq ---------------------------

  %----------------------- Beginning of formJacobian ----------------------

  function [Jacobian, c, wresid, y_est,myrank] = formJacobian(alpha, Phi, dPhi)

    % function [Jacobian, c, wresid, y_est,myrank] =  formJacobian(alpha, Phi, dPhi)
    %
    % This function computes the Jacobian, the linear parameters c,
    % and the residual for the model with nonlinear parameters alpha.
    % It is used by the functions Varpro and f_lsq.
    %
    % Notation: there are m data observations
    %                     n1 basis functions (columns of Phi)
    %                     n linear parameters c
    %                       (n = n1, or n = n1 - 1)
    %                     q nonlinear parameters alpha
    %                     p nonzero columns of partial derivatives of Phi 
    %
    % Input:
    %
    %      alpha  q x 1   the nonlinear parameters,
    %      Phi    m x n1  the basis functions Phi(alpha),
    %      dPhi   m x p   the partial derivatives of Phi
    %
    % The variables W, y, q, m, n1, and n are also used.
    %
    % Output: 
    %
    %      Jacobian  m x p the Jacobian matrix, with J(i,k) = partial
    %                      derivative of W resid(i) with respect to alpha(k).
    %      c      n x 1 the optimal linear parameters for this choice of alpha.
    %      wresid    m x 1 the weighted residual = W(y - Phi * c)
    %      y_est     m x 1 the model estimates = Phi * c)
    %      myrank    1 x 1 the rank of the matrix W*Phi.


    % First we determine the optimal linear parameters c for
    % the given values of alpha, and the resulting residual.
    %
    % We use the singular value decomposition to solve the linear least
    % squares problem
    %
    %    min_{c} || W resid ||.
    %       resid =  y - Phi * c.
    %
    % If W*Phi has any singular value less than m * its largest singular value, 
    % these singular values are set to zero.

    [U,S,V] = svd(W*Phi(:,1:n));

    % Three cases: Usually n > 1, but n = 1 and n = 0 require
    % special handling (one or no linear parameters).

    if (n > 1)
      s = diag(S);
    elseif (n==1)
      s = S;
    else    %  n = 0
      if isempty(Ind)
        Jacobian = [];
      else
        Jacobian = zeros(length(y),length(alpha));
        Jacobian(:,Ind(2,:)) = -W*dPhi;
      end
      c = [];
      y_est = Phi;
      wresid = W * (y - y_est);
      myrank = 1;
      return
    end

    tol = m * eps;
    myrank = sum(s > tol*s(1) ); % number of singular values > tol*norm(W*Phi)
    s = s(1:myrank);

    if (myrank < n) & (~strcmp(options.Display,'off'))
      disp(sprintf('Warning from VARPRO:'))
      disp(sprintf('   The linear parameters are currently not well-determined.'))
      disp(sprintf('   The rank of the matrix in the subproblem is %d',myrank))
      disp(sprintf('   which is less than the n=%d linear parameters.',n))
    end

    yuse = y;
    if (n < n1)
      yuse  = y - Phi(:,n1); % extra function Phi(:,n+1)
    end
    temp  = U(:,1:myrank)' * (W*yuse);    
    c = V(:,1:myrank) * (temp./s);
    y_est = Phi(:,1:n) * c;
    wresid = W * (yuse - y_est);
    if (n < n1)
      y_est = y_est + Phi(:,n1);
    end

    if (q == 0) | (isempty(dPhi))
      Jacobian = [];
      return
    end

    % Second, we compute the Jacobian.
    % There are two pieces, which we call Jac1 and Jac2,
    % with Jacobian = - (Jac1 + Jac2).
    %
    % The formula for Jac1 is (P D(W*Phi) pinv(W*Phi) y,
    %             and Jac2 is ((W*Phi)^+})^T (P D(W*Phi))^T y.
    % where  P           is the projection onto the orthogonal complement
    %                       of the range of W*Phi,
    %        D(W*Phi)    is the m x n x q tensor of derivatives of W*Phi,
    %        pinv(W*Phi) is the pseudo-inverse of W*Phi.
    %  (See Golub&Pereyra (1973) equation (5.4).  We use their notational  
    %   conventions for multiplications by tensors.)
    %
    % Golub&Pereyra (2003), p. R5 break these formulas down by columns:
    %     The j-th column of Jac1 is P D_j pinv(W*Phi) y
    %                             =  P D_j c             
    % and the j-th column of Jac2 is (P D_j pinv(W*Phi))^T y,
    %                             =  (pinv(W*Phi))^T D_j^T P^T y
    %                             =  (pinv(W*Phi))^T D_j^T wresid.
    % where D_j is the m x n matrix of partial derivatives of W*Phi
    %     with respect to alpha(j).

    % We begin the computation by precomputing 
    %       WdPhi, which contains the derivatives of W*Phi, and 
    %       WdPhi_r, which contains WdPhi' * wresid.

    WdPhi = W * dPhi;
    WdPhi_r = WdPhi' * wresid;
    T2 = zeros(n1, q);
    ctemp = c;
    if (n1 > n)
      ctemp = [ctemp; 1];
    end

    %   Now we work column-by-column, for j=1:q.
    %
    %   We form Jac1 = D(W*Phi) ctemp.
    %   After the loop, this matrix is multiplied by 
    %        P = U(:,myrank+1:m)*(U(:,myrank+1:m)'
    %   to complete the computation.
    % 
    %   We also form  T2 = (D_j(W*Phi))^T wresid  by unpacking
    %   the information in WdPhi_r, using Ind.
    %   After the loop, T2 is multiplied by the pseudoinverse
    %       (pinv(W*Phi))^T = U(:,1:myrank) * diag(1./s) * (V(:,1:myrank)'
    %   to complete the computation of Jac2.
    %   Note: if n1 > n, last row of T2 is not needed.   

    for j=1:q,                            % for each nonlinear parameter alpha(j)
      range = find(Ind(2,:)==j);        % columns of WdPhi relevant to alpha(j)
      indrows = Ind(1,range);           % relevant rows of ctemp
      Jac1(:,j) = WdPhi(:,range) * ctemp(indrows);      
      T2(indrows,j) = WdPhi_r(range);
    end


    Jac1 = U(:,myrank+1:m) * (U(:,myrank+1:m)' * Jac1);

    T2 = diag(1 ./ s(1:myrank)) * (V(:,1:myrank)' * T2(1:n,:));
    Jac2 = U(:,1:myrank) * T2;

    Jacobian = -(Jac1 + Jac2);

    if (mydebug)
      disp(sprintf('VARPRO norm(neglected Jacobian)/norm(Jacobian) = %e',...
        norm(Jac2,'fro')/norm(Jacobian,'fro') ))
      if (myneglect)
        disp('neglecting')
        Jacobian = -Jac1;
      end
    end

  end %-------------------------- End of formJacobian ----------------------

end %------------------------------ End of varpro ------------------------