Skip to main content

egobox_gp/
sparse_algorithm.rs

1use crate::ThetaTuning;
2use crate::errors::{GpError, Result};
3use crate::optimization::{CobylaParams, optimize_params, prepare_multistart};
4use crate::sparse_parameters::{Inducings, ParamTuning, SgpParams, SgpValidParams, SparseMethod};
5use crate::{GpSamplingMethod, correlation_models::*, sample, utils::pairwise_differences};
6use finitediff::ndarr;
7use linfa::prelude::{Dataset, DatasetBase, Fit, Float, PredictInplace};
8use linfa_linalg::{cholesky::*, triangular::*};
9use linfa_pls::PlsRegression;
10use ndarray::{Array, Array1, Array2, ArrayBase, ArrayView2, Axis, Data, Ix1, Ix2, Zip, s};
11use ndarray_einsum::*;
12use ndarray_rand::rand::SeedableRng;
13use ndarray_rand::rand::seq::SliceRandom;
14use rand_xoshiro::Xoshiro256Plus;
15
16use log::debug;
17use rayon::prelude::*;
18#[cfg(feature = "serializable")]
19use serde::{Deserialize, Serialize};
20use std::fmt;
21use std::time::Instant;
22
23/// Woodbury data computed during training and used for prediction
24///
25/// Name came from [Woodbury matrix identity](https://en.wikipedia.org/wiki/Woodbury_matrix_identity)
26#[cfg_attr(
27    feature = "serializable",
28    derive(Serialize, Deserialize),
29    serde(bound(serialize = "F: Serialize", deserialize = "F: Deserialize<'de>"))
30)]
31#[derive(Debug)]
32pub(crate) struct WoodburyData<F: Float> {
33    vec: Array2<F>,
34    inv: Array2<F>,
35}
36impl<F: Float> Clone for WoodburyData<F> {
37    fn clone(&self) -> Self {
38        WoodburyData {
39            vec: self.vec.to_owned(),
40            inv: self.inv.clone(),
41        }
42    }
43}
44
45/// Sparse gaussian process considers a set of `M` inducing points either to approximate the posterior Gaussian distribution
46/// with a low-rank representation (FITC - Fully Independent Training Conditional method), or to approximate the posterior
47/// distribution directly (VFE - Variational Free Energy method).
48///
49/// These methods enable accurate modeling with large training datasets of N points while preserving
50/// computational efficiency. With `M < N`, we get `O(NM^2)` complexity instead of `O(N^3)`
51/// in time processing and `O(NM)` instead of `O(N^2)` in memory space.
52///
53/// See Reference section for more information.
54///
55/// # Implementation
56///
57/// [`SparseGaussianProcess`] inducing points definition can be either random or provided by the user through
58/// the [`Inducings`] specification. The used sparse method is specified with the [`SparseMethod`].
59/// Noise variance can be either specified as a known constant or estimated (see [`ParamTuning`]).
60/// Unlike [`GaussianProcess`]([crate::GaussianProcess]) implementation [`SparseGaussianProcess`]
61/// does not allow choosing a trend which is supposed to be zero.
62/// The correlation kernel might be selected amongst [available kernels](crate::correlation_models).
63/// When targetting a squared exponential kernel, one can use the [SparseKriging] shortcut.  
64///
65/// # Features
66///
67/// ## serializable
68///
69/// The `serializable` feature enables the serialization of GP models using the [`serde crate`](https://serde.rs/).
70///
71/// # Example
72///
73/// ```
74/// use ndarray::{Array, Array1, Array2, Axis};
75/// use ndarray_rand::rand;
76/// use ndarray_rand::rand::SeedableRng;
77/// use ndarray_rand::RandomExt;
78/// use ndarray_rand::rand_distr::{Normal, Uniform};
79/// use linfa::prelude::{Dataset, DatasetBase, Fit, Float, PredictInplace};
80///
81/// use egobox_gp::{SparseKriging, Inducings};
82///
83/// const PI: f64 = std::f64::consts::PI;
84///
85/// // Let us define a hidden target function for our sparse GP example
86/// fn f_obj(x: &Array1<f64>) -> Array1<f64> {
87///     x.mapv(|v| (3. * PI * v).sin() + 0.3 * (9. * PI * v).cos() + 0.5 * (7. * PI * v).sin())
88/// }
89///
90/// // Then we can define a utility function to generate some noisy data
91/// // nt points with a gaussian noise with a variance eta2.
92/// fn make_test_data(
93///     nt: usize,
94///     eta2: f64,
95/// ) -> (Array2<f64>, Array1<f64>) {
96///     let normal = Normal::new(0., eta2.sqrt()).unwrap();
97///     let mut rng = rand::thread_rng();
98///     let gaussian_noise = Array::<f64, _>::random_using((nt, ), normal, &mut rng);
99///     let xt = 2. * Array::<f64, _>::random_using((nt, ), Uniform::new(0., 1.), &mut rng) - 1.;
100///     let yt = f_obj(&xt) + gaussian_noise;
101///     (xt.insert_axis(Axis(1)), yt)
102/// }
103///
104/// // Generate training data
105/// let nt = 200;
106/// // Variance of the gaussian noise on our training data
107/// let eta2: f64 = 0.01;
108/// let (xt, yt) = make_test_data(nt, eta2);
109///
110/// // Train our sparse gaussian process with n inducing points taken in the dataset
111/// let n_inducings = 30;
112/// let sgp = SparseKriging::params(Inducings::Randomized(n_inducings))
113///     .fit(&Dataset::new(xt, yt))
114///     .expect("SGP fitted");
115///
116/// println!("sgp theta={:?}", sgp.theta());
117/// println!("sgp variance={:?}", sgp.variance());
118/// println!("noise variance={:?}", sgp.noise_variance());
119///
120/// // Predict with our trained SGP
121/// let xplot = Array::linspace(-1., 1., 100).insert_axis(Axis(1));
122/// let sgp_vals = sgp.predict(&xplot).unwrap();
123/// let sgp_vars = sgp.predict_var(&xplot).unwrap();
124/// ```
125///
126/// # Reference
127///
128/// Valayer, H.; Bartoli, N.; Castaño-Aguirre, M.; Lafage, R.; Lefebvre, T.; López-Lopera, A.F.; Mouton, S.
129/// [A Python Toolbox for Data-Driven Aerodynamic Modeling Using Sparse Gaussian Processes](https://doi.org/10.3390/aerospace11040260)
130/// Aerospace 2024, 11, 260.
131///
132/// Matthias Bauer, Mark van der Wilk, and Carl Edward Rasmussen.
133/// [Understanding Probabilistic Sparse Gaussian Process Approximations](https://arxiv.org/pdf/1606.04820.pdf).
134/// In: Advances in Neural Information Processing Systems. Ed. by D. Lee et al. Vol. 29. Curran Associates, Inc., 2016
135///
136#[derive(Debug)]
137#[cfg_attr(
138    feature = "serializable",
139    derive(Serialize, Deserialize),
140    serde(bound(
141        serialize = "F: Serialize, Corr: Serialize",
142        deserialize = "F: Deserialize<'de>, Corr: Deserialize<'de>"
143    ))
144)]
145pub struct SparseGaussianProcess<F: Float, Corr: CorrelationModel<F>> {
146    /// Correlation kernel
147    corr: Corr,
148    /// Sparse method used
149    method: SparseMethod,
150    /// Parameter of the autocorrelation model
151    theta: Array1<F>,
152    /// Estimated gaussian process variance
153    sigma2: F,
154    /// Gaussian noise variance
155    noise: F,
156    /// Reduced likelihood value (result from internal optimization)
157    /// Maybe used to compare different trained models
158    likelihood: F,
159    /// Weights in case of KPLS dimension reduction coming from PLS regression (orig_dim, kpls_dim)
160    w_star: Array2<F>,
161    /// Inducing points
162    inducings: Array2<F>,
163    /// Data used for prediction
164    w_data: WoodburyData<F>,
165    /// Training data (input, output)
166    pub(crate) training_data: (Array2<F>, Array1<F>),
167    /// Parameters used to fit this model
168    pub(crate) params: SgpValidParams<F, Corr>,
169}
170
171/// Kriging as sparse GP special case when using squared exponential correlation
172pub type SparseKriging<F> = SgpParams<F, SquaredExponentialCorr>;
173
174impl<F: Float> SparseKriging<F> {
175    /// A constructor for SparseKriging parameters    
176    pub fn params(inducings: Inducings<F>) -> SgpParams<F, SquaredExponentialCorr> {
177        SgpParams::new(SquaredExponentialCorr(), inducings)
178    }
179}
180
181impl<F: Float, Corr: CorrelationModel<F>> Clone for SparseGaussianProcess<F, Corr> {
182    fn clone(&self) -> Self {
183        Self {
184            corr: self.corr,
185            method: self.method,
186            theta: self.theta.to_owned(),
187            sigma2: self.sigma2,
188            noise: self.noise,
189            likelihood: self.likelihood,
190            w_star: self.w_star.to_owned(),
191            inducings: self.inducings.clone(),
192            w_data: self.w_data.clone(),
193            training_data: self.training_data.clone(),
194            params: self.params.clone(),
195        }
196    }
197}
198
199impl<F: Float, Corr: CorrelationModel<F>> fmt::Display for SparseGaussianProcess<F, Corr> {
200    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
201        write!(
202            f,
203            "SGP(corr={}, theta={}, variance={}, noise variance={}, likelihood={})",
204            self.corr, self.theta, self.sigma2, self.noise, self.likelihood
205        )
206    }
207}
208
209impl<F: Float, Corr: CorrelationModel<F>> SparseGaussianProcess<F, Corr> {
210    /// Gp parameters contructor
211    pub fn params<NewCorr: CorrelationModel<F>>(
212        corr: NewCorr,
213        inducings: Inducings<F>,
214    ) -> SgpParams<F, NewCorr> {
215        SgpParams::new(corr, inducings)
216    }
217
218    fn compute_k(
219        &self,
220        a: &ArrayBase<impl Data<Elem = F>, Ix2>,
221        b: &ArrayBase<impl Data<Elem = F>, Ix2>,
222        w_star: &Array2<F>,
223        theta: &Array1<F>,
224        sigma2: F,
225    ) -> Array2<F> {
226        // Get pairwise componentwise L1-distances to the input training set
227        let dx = pairwise_differences(a, b);
228        // Compute the correlation function
229        let r = self.corr.rval_from_distances(&dx, theta, w_star);
230        r.into_shape_with_order((a.nrows(), b.nrows()))
231            .unwrap()
232            .mapv(|v| v * sigma2)
233    }
234
235    /// Predict output values at n given `x` points of nx components specified as a (n, nx) matrix.
236    /// Returns n scalar output values as as a vector (n,).
237    pub fn predict(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Result<Array1<F>> {
238        let kx = self.compute_k(x, &self.inducings, &self.w_star, &self.theta, self.sigma2);
239        let mu = kx.dot(&self.w_data.vec).remove_axis(Axis(1));
240        Ok(mu)
241    }
242
243    /// Predict variance values at n given `x` points of nx components specified as a (n, nx) matrix.
244    /// Returns n variance values as (n,) column vector.
245    pub fn predict_var(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Result<Array1<F>> {
246        let kx = self.compute_k(&self.inducings, x, &self.w_star, &self.theta, self.sigma2);
247        let kxx = Array::from_elem(x.nrows(), self.sigma2);
248        let var = kxx - (self.w_data.inv.t().clone().dot(&kx) * &kx).sum_axis(Axis(0));
249        let var = var.mapv(|v| {
250            if v < F::cast(1e-15) {
251                F::cast(1e-15) + self.noise
252            } else {
253                v + self.noise
254            }
255        });
256        Ok(var)
257    }
258
259    /// Optimal theta
260    pub fn theta(&self) -> &Array1<F> {
261        &self.theta
262    }
263
264    /// Estimated variance
265    pub fn variance(&self) -> F {
266        self.sigma2
267    }
268
269    /// Estimated noise variance
270    pub fn noise_variance(&self) -> F {
271        self.noise
272    }
273
274    /// Retrieve reduced likelihood value
275    pub fn likelihood(&self) -> F {
276        self.likelihood
277    }
278
279    /// Inducing points
280    pub fn inducings(&self) -> &Array2<F> {
281        &self.inducings
282    }
283
284    /// Retrieve number of PLS components 1 <= n <= x dimension
285    pub fn kpls_dim(&self) -> Option<usize> {
286        if self.w_star.ncols() < self.training_data.0.ncols() {
287            Some(self.w_star.ncols())
288        } else {
289            None
290        }
291    }
292
293    /// Retrieve input and output dimensions
294    pub fn dims(&self) -> (usize, usize) {
295        (self.training_data.0.ncols(), self.training_data.1.len())
296    }
297
298    /// Predict gradients of the mean function at n given `x` points of nx components specified as a (n, nx) matrix.
299    /// Returns n gradient vectors as a (n, nx) matrix.
300    pub fn predict_gradients(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Array2<F> {
301        let mut drv = Array2::<F>::zeros((x.nrows(), self.training_data.0.ncols()));
302        let f = |x: &Array1<f64>| -> std::result::Result<f64, anyhow::Error> {
303            let x = x.to_owned().insert_axis(Axis(0)).mapv(|v| F::cast(v));
304            let v = self.predict(&x).unwrap()[0];
305            Ok(unsafe { *(&v as *const F as *const f64) })
306        };
307        Zip::from(drv.rows_mut())
308            .and(x.rows())
309            .for_each(|mut row, xi| {
310                let xi = xi.mapv(|v| unsafe { *(&v as *const F as *const f64) });
311                let g_central = ndarr::central_diff(&f);
312                let grad = g_central(&xi).unwrap().mapv(|v| F::cast(v));
313                row.assign(&grad);
314            });
315        drv
316    }
317
318    /// Predict gradients of the variance function at n given `x` points of nx components specified as a (n, nx) matrix.
319    /// Returns n gradient vectors as a (n, nx) matrix.
320    pub fn predict_var_gradients(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Array2<F> {
321        let mut drv = Array2::<F>::zeros((x.nrows(), self.training_data.0.ncols()));
322        let f = |x: &Array1<f64>| -> std::result::Result<f64, anyhow::Error> {
323            let x = x.to_owned().insert_axis(Axis(0)).mapv(|v| F::cast(v));
324            let v = self.predict_var(&x).unwrap()[0];
325            Ok(unsafe { *(&v as *const F as *const f64) })
326        };
327        Zip::from(drv.rows_mut())
328            .and(x.rows())
329            .for_each(|mut row, xi| {
330                let xi = xi.mapv(|v| unsafe { *(&v as *const F as *const f64) });
331                let grad = ndarr::central_diff(&f)(&xi).unwrap().mapv(|v| F::cast(v));
332                row.assign(&grad);
333            });
334        drv
335    }
336
337    /// Sample the gaussian process for `n_traj` trajectories using cholesky decomposition
338    pub fn sample_chol(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>, n_traj: usize) -> Array2<F> {
339        self._sample(x, n_traj, GpSamplingMethod::Cholesky)
340    }
341
342    /// Sample the gaussian process for `n_traj` trajectories using eigenvalues decomposition
343    pub fn sample_eig(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>, n_traj: usize) -> Array2<F> {
344        self._sample(x, n_traj, GpSamplingMethod::EigenValues)
345    }
346
347    /// Sample the gaussian process for `n_traj` trajectories using eigenvalues decomposition (alias of `sample_eig`)
348    pub fn sample(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>, n_traj: usize) -> Array2<F> {
349        self.sample_eig(x, n_traj)
350    }
351
352    fn _sample(
353        &self,
354        x: &ArrayBase<impl Data<Elem = F>, Ix2>,
355        n_traj: usize,
356        method: GpSamplingMethod,
357    ) -> Array2<F> {
358        let mean = self.predict(x).unwrap().insert_axis(Axis(1));
359        let cov = self.compute_k(x, x, &self.w_star, &self.theta, self.sigma2);
360        sample(x, mean, cov, n_traj, method)
361    }
362}
363
364impl<F, D, Corr> PredictInplace<ArrayBase<D, Ix2>, Array1<F>> for SparseGaussianProcess<F, Corr>
365where
366    F: Float,
367    D: Data<Elem = F>,
368    Corr: CorrelationModel<F>,
369{
370    fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array1<F>) {
371        assert_eq!(
372            x.nrows(),
373            y.len(),
374            "The number of data points must match the number of output targets."
375        );
376
377        let values = self.predict(x).expect("GP Prediction");
378        *y = values;
379    }
380
381    fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array1<F> {
382        Array1::zeros((x.nrows(),))
383    }
384}
385
386/// Sparse Gausssian Process adaptator to implement `linfa::Predict` trait for variance prediction.
387#[allow(dead_code)]
388pub struct SparseGpVariancePredictor<'a, F, Corr>(&'a SparseGaussianProcess<F, Corr>)
389where
390    F: Float,
391    Corr: CorrelationModel<F>;
392
393impl<F, D, Corr> PredictInplace<ArrayBase<D, Ix2>, Array1<F>>
394    for SparseGpVariancePredictor<'_, F, Corr>
395where
396    F: Float,
397    D: Data<Elem = F>,
398    Corr: CorrelationModel<F>,
399{
400    fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array1<F>) {
401        assert_eq!(
402            x.nrows(),
403            y.len(),
404            "The number of data points must match the number of output targets."
405        );
406
407        let values = self.0.predict_var(x).expect("GP Prediction");
408        *y = values;
409    }
410
411    fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array1<F> {
412        Array1::zeros(x.nrows())
413    }
414}
415
416impl<F: Float, Corr: CorrelationModel<F>, D: Data<Elem = F> + Sync>
417    Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>, GpError> for SgpValidParams<F, Corr>
418{
419    type Object = SparseGaussianProcess<F, Corr>;
420
421    /// Fit GP parameters using maximum likelihood
422    fn fit(
423        &self,
424        dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>>,
425    ) -> Result<Self::Object> {
426        let x = dataset.records();
427        let y = dataset.targets().to_owned().insert_axis(Axis(1));
428
429        if let Some(d) = self.kpls_dim()
430            && *d > x.ncols()
431        {
432            return Err(GpError::InvalidValueError(format!(
433                "Dimension reduction {} should be smaller than actual \
434                    training input dimensions {}",
435                d,
436                x.ncols()
437            )));
438        }
439
440        let xtrain = x;
441        let ytrain = y;
442
443        let mut w_star = Array2::eye(x.ncols());
444        if let Some(n_components) = self.kpls_dim() {
445            let ds = Dataset::new(xtrain.to_owned(), ytrain.to_owned());
446            w_star = PlsRegression::params(*n_components).fit(&ds).map_or_else(
447                |e| match e {
448                    linfa_pls::PlsError::PowerMethodConstantResidualError() => {
449                        Ok(Array2::zeros((x.ncols(), *n_components)))
450                    }
451                    err => Err(err),
452                },
453                |v| Ok(v.rotations().0.to_owned()),
454            )?;
455        };
456
457        let mut rng = match self.seed() {
458            Some(seed) => Xoshiro256Plus::seed_from_u64(*seed),
459            None => Xoshiro256Plus::from_entropy(),
460        };
461        let z = match self.inducings() {
462            Inducings::Randomized(n) => make_inducings(*n, &xtrain.view(), &mut rng),
463            Inducings::Located(z) => z.to_owned(),
464        };
465
466        // Initial guess for noise, when noise variance constant, it is not part of optimization params
467        let (is_noise_estimated, noise0) = match self.noise_variance() {
468            ParamTuning::Fixed(c) => (false, c),
469            ParamTuning::Optimized { init: c, bounds: _ } => (true, c),
470        };
471
472        let (init, bounds) = match self.theta_tuning() {
473            ThetaTuning::Full { init, bounds } => (init.clone(), bounds.clone()),
474            ThetaTuning::Partial {
475                init,
476                active: _,
477                bounds,
478            } => {
479                log::warn!(
480                    "Partial hyperparameter optimization not implemented in SparseGp, full optimization used"
481                );
482                (init.clone(), bounds.clone())
483            }
484            ThetaTuning::Fixed(init) => (init.clone(), init.iter().map(|v| (*v, *v)).collect()),
485        };
486
487        // Initial guess for theta
488        let theta0_dim = init.len();
489        let theta0 = if theta0_dim == 1 {
490            Array1::from_elem(w_star.ncols(), self.theta_tuning().init()[0])
491        } else if theta0_dim == w_star.ncols() {
492            Array::from_vec(self.theta_tuning().init().to_vec())
493        } else {
494            panic!(
495                "Initial guess for theta should be either 1-dim or dim of xtrain (w_star.ncols()), got {theta0_dim}"
496            )
497        };
498
499        // Initial guess for variance
500        let y_std = ytrain.std_axis(Axis(0), F::one());
501        let sigma2_0 = y_std[0] * y_std[0];
502
503        // Params consist in [theta1, ..., thetap, sigma2, [noise]]
504        // where sigma2 is the variance of the gaussian process
505        // where noise is the variance of the noise when it is estimated
506        let n = theta0.len() + 1 + is_noise_estimated as usize;
507        let mut params_0 = Array1::zeros(n);
508        params_0
509            .slice_mut(s![..n - 1 - is_noise_estimated as usize])
510            .assign(&theta0);
511        params_0[n - 1 - is_noise_estimated as usize] = sigma2_0;
512        if is_noise_estimated {
513            // noise variance is estimated, noise0 is initial_guess
514            params_0[n - 1] = *noise0;
515        }
516
517        // We prefer optimize variable change log10(theta)
518        // as theta is the inverse of a lengthscale in objective function
519        let base: f64 = 10.;
520        let objfn = |x: &[f64], _gradient: Option<&mut [f64]>, _params: &mut ()| -> f64 {
521            for v in x.iter() {
522                // check theta as optimizer may give nan values
523                if v.is_nan() {
524                    // shortcut return worst value wrt to rlf minimization
525                    return f64::INFINITY;
526                }
527            }
528            let input = Array1::from_shape_vec(
529                (x.len(),),
530                x.iter().map(|v| F::cast(base.powf(*v))).collect(),
531            )
532            .unwrap();
533
534            let theta = input.slice(s![..input.len() - 1 - is_noise_estimated as usize]);
535            let sigma2 = input[input.len() - 1 - is_noise_estimated as usize];
536            let noise = if is_noise_estimated {
537                input[input.len() - 1]
538            } else {
539                F::cast(*noise0)
540            };
541
542            let theta = theta.mapv(F::cast);
543            match self.reduced_likelihood(
544                &theta,
545                sigma2,
546                noise,
547                &w_star,
548                &xtrain.view(),
549                &ytrain.view(),
550                &z,
551                self.nugget(),
552            ) {
553                Ok(r) => unsafe { -(*(&r.0 as *const F as *const f64)) },
554                Err(_) => f64::INFINITY,
555            }
556        };
557
558        // // bounds of theta, variance and optionally noise variance
559        // let mut bounds = vec![(F::cast(1e-16).log10(), F::cast(1.).log10()); params.ncols()];
560        // Initial guess for theta
561        let bounds_dim = bounds.len();
562        let bounds = if bounds_dim == 1 {
563            vec![bounds[0]; params_0.len()]
564        } else if theta0_dim == params_0.len() {
565            bounds.to_vec()
566        } else {
567            panic!(
568                "Bounds for theta should be either 1-dim or dim of xtrain ({}), got {}",
569                w_star.ncols(),
570                theta0_dim
571            )
572        };
573
574        let (theta_inits, mut bounds) = prepare_multistart(self.n_start(), &params_0, &bounds);
575
576        // variance bounds
577        bounds[theta_inits.ncols() - 1 - is_noise_estimated as usize] =
578            (F::cast(1e-12).log10(), (F::cast(9.) * sigma2_0).log10());
579        // optionally adjust noise variance bounds
580        if let ParamTuning::Optimized {
581            init: _,
582            bounds: (lo, up),
583        } = self.noise_variance()
584        {
585            // Set bounds for noise
586            if let Some(noise_bounds) = bounds.last_mut() {
587                *noise_bounds = (lo.log10(), up.log10());
588            }
589        }
590        debug!("Optimize with multistart theta = {theta_inits:?} and bounds = {bounds:?}");
591        let now = Instant::now();
592        let opt_params = (0..theta_inits.nrows())
593            .into_par_iter()
594            .map(|i| {
595                optimize_params(
596                    objfn,
597                    &theta_inits.row(i).to_owned(),
598                    &bounds,
599                    CobylaParams {
600                        maxeval: (10 * theta0_dim)
601                            .clamp(crate::GP_COBYLA_MIN_EVAL, self.max_eval()),
602                        ..CobylaParams::default()
603                    },
604                )
605            })
606            .reduce(
607                || (f64::INFINITY, Array::ones((theta_inits.ncols(),))),
608                |a, b| if b.0 < a.0 { b } else { a },
609            );
610        debug!("elapsed optim = {:?}", now.elapsed().as_millis());
611        let opt_params = opt_params.1.mapv(|v| F::cast(base.powf(v)));
612
613        let opt_theta = opt_params
614            .slice(s![..n - 1 - is_noise_estimated as usize])
615            .to_owned();
616        let opt_sigma2 = opt_params[n - 1 - is_noise_estimated as usize];
617        let opt_noise = if is_noise_estimated {
618            opt_params[n - 1]
619        } else {
620            *noise0
621        };
622
623        // Recompute reduced likelihood with optimized params
624        let (lkh, w_data) = self.reduced_likelihood(
625            &opt_theta,
626            opt_sigma2,
627            opt_noise,
628            &w_star,
629            &xtrain.view(),
630            &ytrain.view(),
631            &z,
632            self.nugget(),
633        )?;
634        Ok(SparseGaussianProcess {
635            corr: *self.corr(),
636            method: self.method(),
637            theta: opt_theta,
638            sigma2: opt_sigma2,
639            noise: opt_noise,
640            likelihood: lkh,
641            w_data,
642            w_star,
643            inducings: z.clone(),
644            training_data: (xtrain.to_owned(), ytrain.remove_axis(Axis(1))),
645            params: self.clone(),
646        })
647    }
648}
649
650impl<F: Float, Corr: CorrelationModel<F>> SgpValidParams<F, Corr> {
651    /// Compute reduced likelihood function
652    /// nugget: factor to improve numerical stability  
653    #[allow(clippy::too_many_arguments)]
654    fn reduced_likelihood(
655        &self,
656        theta: &Array1<F>,
657        sigma2: F,
658        noise: F,
659        w_star: &Array2<F>,
660        xtrain: &ArrayView2<F>,
661        ytrain: &ArrayView2<F>,
662        z: &Array2<F>,
663        nugget: F,
664    ) -> Result<(F, WoodburyData<F>)> {
665        let (likelihood, w_data) = match self.method() {
666            SparseMethod::Fitc => {
667                self.fitc(theta, sigma2, noise, w_star, xtrain, ytrain, z, nugget)
668            }
669            SparseMethod::Vfe => self.vfe(theta, sigma2, noise, w_star, xtrain, ytrain, z, nugget),
670        };
671
672        Ok((likelihood, w_data))
673    }
674
675    /// Compute covariance matrix between a and b matrices
676    fn compute_k(
677        &self,
678        a: &ArrayBase<impl Data<Elem = F>, Ix2>,
679        b: &ArrayBase<impl Data<Elem = F>, Ix2>,
680        w_star: &Array2<F>,
681        theta: &Array1<F>,
682        sigma2: F,
683    ) -> Array2<F> {
684        // Get pairwise componentwise L1-distances to the input training set
685        let dx = pairwise_differences(a, b);
686        // Compute the correlation function
687        let r = self.corr().rval_from_distances(&dx, theta, w_star);
688        r.into_shape_with_order((a.nrows(), b.nrows()))
689            .unwrap()
690            .mapv(|v| v * sigma2)
691    }
692
693    /// FITC method
694    #[allow(clippy::too_many_arguments)]
695    fn fitc(
696        &self,
697        theta: &Array1<F>,
698        sigma2: F,
699        noise: F,
700        w_star: &Array2<F>,
701        xtrain: &ArrayView2<F>,
702        ytrain: &ArrayView2<F>,
703        z: &Array2<F>,
704        nugget: F,
705    ) -> (F, WoodburyData<F>) {
706        let nz = z.nrows();
707        let knn = Array1::from_elem(xtrain.nrows(), sigma2);
708        let kmm = self.compute_k(z, z, w_star, theta, sigma2) + Array::eye(nz) * nugget;
709        let kmn = self.compute_k(z, xtrain, w_star, theta, sigma2);
710
711        // Compute (lower) Cholesky decomposition: Kmm = U U^T
712        let u = kmm.cholesky().unwrap();
713
714        // Compute cholesky decomposition: Qnn = V^T V
715        let ui = u
716            .solve_triangular(&Array::eye(u.nrows()), UPLO::Lower)
717            .unwrap();
718        let v = ui.dot(&kmn);
719
720        // Assumption on the gaussian noise on training outputs
721        let eta2 = noise;
722
723        // Compute diagonal correction: nu = Knn_diag - Qnn_diag + \eta^2
724        let nu = knn;
725        let nu = nu - (v.to_owned() * &v).sum_axis(Axis(0));
726        let nu = nu + eta2;
727        // Compute beta, the effective noise precision
728        let beta = nu.mapv(|v| F::one() / v);
729
730        // Compute (lower) Cholesky decomposition: A = I + V diag(beta) V^T = L L^T
731        let a = Array::eye(nz) + &(v.to_owned() * beta.to_owned().insert_axis(Axis(0))).dot(&v.t());
732
733        let l = a.cholesky().unwrap();
734        let li = l
735            .solve_triangular(&Array::eye(l.nrows()), UPLO::Lower)
736            .unwrap();
737
738        // Compute a and b
739        let a = einsum("ij,i->ij", &[ytrain, &beta])
740            .unwrap()
741            .into_dimensionality::<Ix2>()
742            .unwrap();
743        let tmp = li.dot(&v);
744        let b = tmp.dot(&a);
745
746        // Compute marginal log-likelihood
747        // constant term ignored in reduced likelihood
748        //let term0 = self.training_data.1.nrows() * F::cast(2. * std::f64::consts::PI);
749        let term1 = nu.mapv(|v| v.ln()).sum();
750        let term2 = F::cast(2.) * l.diag().mapv(|v| v.ln()).sum();
751        let term3 = (a.t().to_owned()).dot(ytrain)[[0, 0]];
752        //let term4 = einsum("ij,ij->", &[&b, &b]).unwrap();
753        let term4 = -(b.to_owned() * &b).sum();
754        let likelihood = -F::cast(0.5) * (term1 + term2 + term3 + term4);
755
756        // Store Woodbury vectors for prediction step
757        let li_ui = li.dot(&ui);
758        let li_ui_t = li_ui.t();
759        let w_data = WoodburyData {
760            vec: li_ui_t.dot(&b),
761            inv: (ui.t()).dot(&ui) - li_ui_t.dot(&li_ui),
762        };
763
764        (likelihood, w_data)
765    }
766
767    /// VFE method
768    #[allow(clippy::too_many_arguments)]
769    fn vfe(
770        &self,
771        theta: &Array1<F>,
772        sigma2: F,
773        noise: F,
774        w_star: &Array2<F>,
775        xtrain: &ArrayView2<F>,
776        ytrain: &ArrayView2<F>,
777        z: &Array2<F>,
778        nugget: F,
779    ) -> (F, WoodburyData<F>) {
780        // Compute: Kmm and Kmn
781        let nz = z.nrows();
782        let kmm = self.compute_k(z, z, w_star, theta, sigma2) + Array::eye(nz) * nugget;
783        let kmn = self.compute_k(z, xtrain, w_star, theta, sigma2);
784
785        // Compute cholesky decomposition: Kmm = U U^T
786        let u = kmm.cholesky().unwrap();
787
788        // Compute cholesky decomposition: Qnn = V^T V
789        let ui = u
790            .solve_triangular(&Array::eye(u.nrows()), UPLO::Lower)
791            .unwrap();
792        let v = ui.dot(&kmn);
793
794        // Compute beta, the effective noise precision
795        let beta = F::one() / noise.max(nugget);
796
797        // Compute A = beta * V @ V.T
798        let a = v.to_owned().dot(&v.t()).mapv(|v| v * beta);
799
800        // Compute cholesky decomposition: B = I + A = L L^T
801        let b: Array2<F> = Array::eye(nz) + &a;
802        let l = b.cholesky().unwrap();
803        let li = l
804            .solve_triangular(&Array::eye(l.nrows()), UPLO::Lower)
805            .unwrap();
806
807        // Compute b
808        let b = li.dot(&v).dot(ytrain).mapv(|v| v * beta);
809
810        // Compute log-marginal likelihood
811        // constant term ignored in reduced likelihood
812        //let term0 = self.training_data.1.nrows() * (F::cast(2. * std::f64::consts::PI)
813        let term1 = -F::cast(ytrain.nrows()) * beta.ln();
814        let term2 = F::cast(2.) * l.diag().mapv(|v| v.ln()).sum();
815        let term3 = beta * (ytrain.to_owned() * ytrain).sum();
816        let term4 = -b.t().dot(&b)[[0, 0]];
817        let term5 = F::cast(ytrain.nrows()) * beta * sigma2;
818        let term6 = -a.diag().sum();
819
820        let likelihood = -F::cast(0.5) * (term1 + term2 + term3 + term4 + term5 + term6);
821
822        let li_ui = li.dot(&ui);
823        let bi = Array::eye(nz) + li.t().dot(&li);
824        let w_data = WoodburyData {
825            vec: li_ui.t().dot(&b),
826            inv: ui.t().dot(&bi).dot(&ui),
827        };
828
829        (likelihood, w_data)
830    }
831}
832
833fn make_inducings<F: Float>(
834    n_inducing: usize,
835    xt: &ArrayView2<F>,
836    rng: &mut Xoshiro256Plus,
837) -> Array2<F> {
838    let mut indices = (0..xt.nrows()).collect::<Vec<_>>();
839    indices.shuffle(rng);
840    let n = n_inducing.min(xt.nrows());
841    let mut z = Array2::zeros((n, xt.ncols()));
842    let idx = indices[..n].to_vec();
843    Zip::from(z.rows_mut())
844        .and(&Array1::from_vec(idx))
845        .for_each(|mut zi, i| zi.assign(&xt.row(*i)));
846    z
847}
848
849#[cfg(test)]
850mod tests {
851    use super::*;
852
853    use approx::assert_abs_diff_eq;
854    use ndarray::{Array, array, concatenate};
855    // use ndarray_npy::{read_npy, write_npy};
856    use ndarray_npy::write_npy;
857    use ndarray_rand::RandomExt;
858    use ndarray_rand::rand::SeedableRng;
859    use ndarray_rand::rand_distr::{Normal, Uniform};
860    use rand_xoshiro::Xoshiro256Plus;
861
862    const PI: f64 = std::f64::consts::PI;
863
864    fn f_obj(x: &ArrayBase<impl Data<Elem = f64>, Ix2>) -> Array2<f64> {
865        x.mapv(|v| (3. * PI * v).sin() + 0.3 * (9. * PI * v).cos() + 0.5 * (7. * PI * v).sin())
866    }
867
868    fn make_test_data(
869        nt: usize,
870        eta2: f64,
871        rng: &mut Xoshiro256Plus,
872    ) -> (Array2<f64>, Array1<f64>) {
873        let normal = Normal::new(0., eta2.sqrt()).unwrap();
874        let gaussian_noise = Array::<f64, _>::random_using((nt, 1), normal, rng);
875        let xt = 2. * Array::<f64, _>::random_using((nt, 1), Uniform::new(0., 1.), rng) - 1.;
876        let yt = f_obj(&xt) + gaussian_noise;
877        (xt, yt.remove_axis(Axis(1)))
878    }
879
880    fn save_data(
881        xt: &Array2<f64>,
882        yt: &Array2<f64>,
883        z: &Array2<f64>,
884        xplot: &Array2<f64>,
885        sgp_vals: &Array2<f64>,
886        sgp_vars: &Array2<f64>,
887    ) {
888        let test_dir = "target/tests";
889        std::fs::create_dir_all(test_dir).ok();
890
891        let file_path = format!("{test_dir}/sgp_xt.npy");
892        write_npy(file_path, xt).expect("xt saved");
893        let file_path = format!("{test_dir}/sgp_yt.npy");
894        write_npy(file_path, yt).expect("yt saved");
895        let file_path = format!("{test_dir}/sgp_z.npy");
896        write_npy(file_path, z).expect("z saved");
897        let file_path = format!("{test_dir}/sgp_x.npy");
898        write_npy(file_path, xplot).expect("x saved");
899        let file_path = format!("{test_dir}/sgp_vals.npy");
900        write_npy(file_path, sgp_vals).expect("sgp vals saved");
901        let file_path = format!("{test_dir}/sgp_vars.npy");
902        write_npy(file_path, sgp_vars).expect("sgp vars saved");
903    }
904
905    #[test]
906    fn test_sgp_default() {
907        let mut rng = Xoshiro256Plus::seed_from_u64(42);
908        // Generate training data
909        let nt = 200;
910        // Variance of the gaussian noise on our training data
911        let eta2: f64 = 0.01;
912        let (xt, yt) = make_test_data(nt, eta2, &mut rng);
913        // let test_dir = "target/tests";
914        // let file_path = format!("{}/smt_xt.npy", test_dir);
915        // let xt: Array2<f64> = read_npy(file_path).expect("xt read");
916        // let file_path = format!("{}/smt_yt.npy", test_dir);
917        // let yt: Array2<f64> = read_npy(file_path).expect("yt read");
918
919        let xplot = Array::linspace(-1.0, 1.0, 100).insert_axis(Axis(1));
920        let n_inducings = 30;
921
922        // let file_path = format!("{}/smt_z.npy", test_dir);
923        // let z: Array2<f64> = read_npy(file_path).expect("z read");
924        // println!("{:?}", z);
925
926        let sgp = SparseKriging::params(Inducings::Randomized(n_inducings))
927            //let sgp = SparseKriging::params(Inducings::Located(z))
928            //.noise_variance(ParamEstimation::Constant(0.01))
929            .seed(Some(42))
930            .fit(&Dataset::new(xt.clone(), yt.clone()))
931            .expect("GP fitted");
932
933        println!("theta={:?}", sgp.theta());
934        println!("variance={:?}", sgp.variance());
935        println!("noise variance={:?}", sgp.noise_variance());
936        // assert_abs_diff_eq!(eta2, sgp.noise_variance());
937
938        let sgp_vals = sgp.predict(&xplot).unwrap().insert_axis(Axis(1));
939        let yplot = f_obj(&xplot);
940        let errvals = (yplot - &sgp_vals).mapv(|v| v.abs());
941        assert_abs_diff_eq!(errvals, Array2::zeros((xplot.nrows(), 1)), epsilon = 0.5);
942        let sgp_vars = sgp.predict_var(&xplot).unwrap().insert_axis(Axis(1));
943        let errvars = (&sgp_vars - Array2::from_elem((xplot.nrows(), 1), 0.01)).mapv(|v| v.abs());
944        assert_abs_diff_eq!(errvars, Array2::zeros((xplot.nrows(), 1)), epsilon = 0.3);
945
946        save_data(
947            &xt,
948            &yt.insert_axis(Axis(1)),
949            sgp.inducings(),
950            &xplot,
951            &sgp_vals,
952            &sgp_vars,
953        );
954    }
955
956    #[test]
957    fn test_sgp_vfe() {
958        let mut rng = Xoshiro256Plus::seed_from_u64(42);
959        // Generate training data
960        let nt = 200;
961        // Variance of the gaussian noise on our training data
962        let eta2: f64 = 0.01;
963        let (xt, yt) = make_test_data(nt, eta2, &mut rng);
964        // let test_dir = "target/tests";
965        // let file_path = format!("{}/smt_xt.npy", test_dir);
966        // let xt: Array2<f64> = read_npy(file_path).expect("xt read");
967        // let file_path = format!("{}/smt_yt.npy", test_dir);
968        // let yt: Array2<f64> = read_npy(file_path).expect("yt read");
969
970        let xplot = Array::linspace(-1., 1., 100).insert_axis(Axis(1));
971        let n_inducings = 30;
972
973        let z = make_inducings(n_inducings, &xt.view(), &mut rng);
974        // let file_path = format!("{}/smt_z.npy", test_dir);
975        // let z: Array2<f64> = read_npy(file_path).expect("z read");
976
977        let sgp = SparseGaussianProcess::<f64, SquaredExponentialCorr>::params(
978            SquaredExponentialCorr::default(),
979            Inducings::Located(z),
980        )
981        .sparse_method(SparseMethod::Vfe)
982        .noise_variance(ParamTuning::Fixed(0.01))
983        .fit(&Dataset::new(xt.clone(), yt.clone()))
984        .expect("GP fitted");
985
986        println!("theta={:?}", sgp.theta());
987        println!("variance={:?}", sgp.variance());
988        println!("noise variance={:?}", sgp.noise_variance());
989        assert_abs_diff_eq!(eta2, sgp.noise_variance());
990
991        let sgp_vals = sgp.predict(&xplot).unwrap().insert_axis(Axis(1));
992        let sgp_vars = sgp.predict_var(&xplot).unwrap().insert_axis(Axis(1));
993
994        save_data(
995            &xt,
996            &yt.insert_axis(Axis(1)),
997            sgp.inducings(),
998            &xplot,
999            &sgp_vals,
1000            &sgp_vars,
1001        );
1002    }
1003
1004    #[test]
1005    fn test_sgp_noise_estimation() {
1006        let mut rng = Xoshiro256Plus::seed_from_u64(0);
1007        // Generate training data
1008        let nt = 200;
1009        // Variance of the gaussian noise on our training data
1010        let eta2: f64 = 0.01;
1011        let (xt, yt) = make_test_data(nt, eta2, &mut rng);
1012        // let test_dir = "target/tests";
1013        // let file_path = format!("{}/smt_xt.npy", test_dir);
1014        // let xt: Array2<f64> = read_npy(file_path).expect("xt read");
1015        // let file_path = format!("{}/smt_yt.npy", test_dir);
1016        // let yt: Array2<f64> = read_npy(file_path).expect("yt read");
1017
1018        let xplot = Array::linspace(-1., 1., 100).insert_axis(Axis(1));
1019        let n_inducings = 30;
1020
1021        let z = make_inducings(n_inducings, &xt.view(), &mut rng);
1022        // let file_path = format!("{}/smt_z.npy", test_dir);
1023        // let z: Array2<f64> = read_npy(file_path).expect("z read");
1024
1025        let sgp = SparseGaussianProcess::<f64, SquaredExponentialCorr>::params(
1026            SquaredExponentialCorr::default(),
1027            Inducings::Located(z.clone()),
1028        )
1029        .sparse_method(SparseMethod::Vfe)
1030        //.sparse_method(SparseMethod::Fitc)
1031        .theta_init(array![0.1])
1032        .noise_variance(ParamTuning::Optimized {
1033            init: 0.02,
1034            bounds: (1e-3, 1.),
1035        })
1036        .fit(&Dataset::new(xt.clone(), yt.clone()))
1037        .expect("SGP fitted");
1038
1039        println!("theta={:?}", sgp.theta());
1040        println!("variance={:?}", sgp.variance());
1041        println!("noise variance={:?}", sgp.noise_variance());
1042        #[cfg(not(feature = "nlopt"))]
1043        assert_abs_diff_eq!(eta2, sgp.noise_variance(), epsilon = 0.015);
1044        assert_abs_diff_eq!(&z, sgp.inducings(), epsilon = 0.0015);
1045
1046        let sgp_vals = sgp.predict(&xplot).unwrap().insert_axis(Axis(1));
1047        let sgp_vars = sgp.predict_var(&xplot).unwrap().insert_axis(Axis(1));
1048
1049        save_data(
1050            &xt,
1051            &yt.insert_axis(Axis(1)),
1052            &z,
1053            &xplot,
1054            &sgp_vals,
1055            &sgp_vars,
1056        );
1057    }
1058
1059    #[test]
1060    #[should_panic]
1061    fn test_multiple_outputs() {
1062        let mut rng = Xoshiro256Plus::seed_from_u64(42);
1063        // Generate training data
1064        let nt = 200;
1065        // Variance of the gaussian noise on our training data
1066        let eta2: f64 = 0.01;
1067        let (xt, yt) = make_test_data(nt, eta2, &mut rng);
1068        let yt = concatenate(Axis(1), &[yt.view(), yt.view()]).unwrap();
1069        let n_inducings = 30;
1070
1071        let _sgp = SparseKriging::params(Inducings::Randomized(n_inducings))
1072            .fit(&Dataset::new(xt.clone(), yt.clone()))
1073            .expect("GP fitted");
1074    }
1075}