Skip to main content

egobox_gp/
algorithm.rs

1use crate::errors::{GpError, Result};
2use crate::mean_models::*;
3use crate::optimization::{CobylaParams, optimize_params, prepare_multistart};
4use crate::parameters::{GpParams, GpValidParams};
5use crate::utils::{DiffMatrix, NormalizedData, pairwise_differences};
6use crate::{ThetaTuning, correlation_models::*};
7
8use linfa::dataset::{WithLapack, WithoutLapack};
9use linfa::prelude::{Dataset, DatasetBase, Fit, Float, PredictInplace};
10
11#[cfg(not(feature = "blas"))]
12use linfa_linalg::{cholesky::*, eigh::*, qr::*, svd::*, triangular::*};
13#[cfg(feature = "blas")]
14use log::warn;
15#[cfg(feature = "blas")]
16use ndarray_linalg::{cholesky::*, eigh::*, qr::*, svd::*, triangular::*};
17
18use linfa_pls::PlsRegression;
19use ndarray::{Array, Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2, Zip};
20
21use ndarray_rand::RandomExt;
22use ndarray_rand::rand_distr::Normal;
23use ndarray_stats::QuantileExt;
24
25use log::debug;
26use rayon::prelude::*;
27#[cfg(feature = "serializable")]
28use serde::{Deserialize, Serialize};
29use std::fmt;
30use std::time::Instant;
31
32/// Default number of multistart for hyperparameters optimization
33pub const GP_OPTIM_N_START: usize = 10;
34/// Minimum of function evaluations for COBYLA optimizer
35pub const GP_COBYLA_MIN_EVAL: usize = 25;
36/// Maximum of function evaluations for COBYLA optimizer
37pub const GP_COBYLA_MAX_EVAL: usize = 1000;
38
39/// Internal parameters computed Gp during training
40/// used later on in prediction computations
41#[derive(Default, Debug)]
42#[cfg_attr(
43    feature = "serializable",
44    derive(Serialize, Deserialize),
45    serde(bound(deserialize = "F: Deserialize<'de>"))
46)]
47pub(crate) struct GpInnerParams<F: Float> {
48    /// Gaussian process variance
49    sigma2: F,
50    /// Generalized least-squares regression weights for Universal Kriging or given beta0 for Ordinary Kriging
51    beta: Array2<F>,
52    /// Gaussian Process weights
53    gamma: Array2<F>,
54    /// Cholesky decomposition of the correlation matrix \[R\]
55    r_chol: Array2<F>,
56    /// Solution of the linear equation system : \[R\] x Ft = y
57    ft: Array2<F>,
58    /// R upper triangle matrix of QR decomposition of the matrix Ft
59    ft_qr_r: Array2<F>,
60}
61
62impl<F: Float> Clone for GpInnerParams<F> {
63    fn clone(&self) -> Self {
64        Self {
65            sigma2: self.sigma2.to_owned(),
66            beta: self.beta.to_owned(),
67            gamma: self.gamma.to_owned(),
68            r_chol: self.r_chol.to_owned(),
69            ft: self.ft.to_owned(),
70            ft_qr_r: self.ft_qr_r.to_owned(),
71        }
72    }
73}
74
75/// A GP regression is an interpolation method where the
76/// interpolated values are modeled by a Gaussian process with a mean and
77/// governed by a prior covariance kernel, which depends on some
78/// parameters to be determined.
79///
80/// The interpolated output is modeled as stochastic process as follows:
81///
82/// `Y(x) = mu(x) + Z(x)`
83///
84/// where:
85/// * `mu(x)` is the trend i.e. the mean of the gaussian process
86/// * `Z(x)` the realization of stochastic gaussian process ~ `Normal(0, sigma^2)`
87///
88/// which in turn is written as:
89///
90/// `Y(x) = betas.regr(x) + sigma^2*corr(x, x')`
91///
92/// where:
93/// * `betas` is a vector of linear regression parameters to be determined
94/// * `regr(x)` a vector of polynomial basis functions
95/// * `sigma^2` is the process variance
96/// * `corr(x, x')` is a correlation function which depends on `distance(x, x')`
97///   and a set of unknown parameters `thetas` to be determined.
98///
99/// # Implementation
100///
101/// * Based on [ndarray](https://github.com/rust-ndarray/ndarray)
102///   and [linfa](https://github.com/rust-ml/linfa) and strive to follow [linfa guidelines](https://github.com/rust-ml/linfa/blob/master/CONTRIBUTE.md)
103/// * GP mean model can be constant, linear or quadratic
104/// * GP correlation model can be build the following kernels: squared exponential, absolute exponential, matern 3/2, matern 5/2    
105///   cf. [SMT Kriging](https://smt.readthedocs.io/en/latest/_src_docs/surrogate_models/krg.html)
106/// * For high dimensional problems, the classic GP algorithm does not perform well as
107///   it depends on the inversion of a correlation (n, n) matrix which is an O(n3) operation.
108///   To work around this problem the library implements dimension reduction using
109///   Partial Least Squares method upon Kriging method also known as KPLS algorithm (see Reference)
110/// * GP models can be saved and loaded using [serde](https://serde.rs/).
111///   See `serializable` feature section below.
112///
113/// # Features
114///
115/// ## serializable
116///
117/// The `serializable` feature enables the serialization of GP models using the [`serde crate`](https://serde.rs/).
118///
119/// ## blas
120///
121/// The `blas` feature enables the use of BLAS/LAPACK linear algebra backend available with [`ndarray-linalg`](https://github.com/rust-ndarray/ndarray-linalg).
122///
123/// # Example
124///
125/// ```no_run
126/// use egobox_gp::{correlation_models::*, mean_models::*, GaussianProcess};
127/// use linfa::prelude::*;
128/// use ndarray::{arr2, concatenate, Array, Array1, Array2, Axis};
129///
130/// // one-dimensional test function to approximate
131/// fn xsinx(x: &Array2<f64>) -> Array1<f64> {
132///     ((x - 3.5) * ((x - 3.5) / std::f64::consts::PI).mapv(|v| v.sin())).remove_axis(Axis(1))
133/// }
134///
135/// // training data
136/// let xt = arr2(&[[0.0], [5.0], [10.0], [15.0], [18.0], [20.0], [25.0]]);
137/// let yt = xsinx(&xt);
138///
139/// // GP with constant mean model and squared exponential correlation model
140/// // i.e. Oridinary Kriging model
141/// let kriging = GaussianProcess::<f64, ConstantMean, SquaredExponentialCorr>::params(
142///                 ConstantMean::default(),
143///                 SquaredExponentialCorr::default())
144///                 .fit(&Dataset::new(xt, yt))
145///                 .expect("Kriging trained");
146///
147/// // Use trained model for making predictions
148/// let xtest = Array::linspace(0., 25., 26).insert_axis(Axis(1));
149/// let ytest = xsinx(&xtest);
150///
151/// let ypred = kriging.predict(&xtest).expect("Kriging prediction");
152/// let yvariances = kriging.predict_var(&xtest).expect("Kriging prediction");  
153///```
154///
155/// # Reference:
156///
157/// Mohamed Amine Bouhlel, John T. Hwang, Nathalie Bartoli, Rémi Lafage, Joseph Morlier, Joaquim R.R.A. Martins,
158/// [A Python surrogate modeling framework with derivatives](https://doi.org/10.1016/j.advengsoft.2019.03.005),
159/// Advances in Engineering Software, Volume 135, 2019, 102662, ISSN 0965-9978.
160///
161/// Bouhlel, Mohamed Amine, et al. [Improving kriging surrogates of high-dimensional design
162/// models by Partial Least Squares dimension reduction](https://hal.archives-ouvertes.fr/hal-01232938/document)
163/// Structural and Multidisciplinary Optimization 53.5 (2016): 935-952.
164///
165#[derive(Debug)]
166#[cfg_attr(
167    feature = "serializable",
168    derive(Serialize, Deserialize),
169    serde(bound(
170        serialize = "F: Serialize, Mean: Serialize, Corr: Serialize",
171        deserialize = "F: Deserialize<'de>, Mean: Deserialize<'de>, Corr: Deserialize<'de>"
172    ))
173)]
174pub struct GaussianProcess<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> {
175    /// Parameter of the autocorrelation model equal to the inverse of length scale
176    theta: Array1<F>,
177    /// Reduced likelihood value (result from internal optimization)
178    /// Maybe used to compare different trained models
179    likelihood: F,
180    /// Gaussian process internal fitted params
181    inner_params: GpInnerParams<F>,
182    /// Weights in case of KPLS dimension reduction coming from PLS regression (orig_dim, kpls_dim)
183    w_star: Array2<F>,
184    /// Training inputs
185    xt_norm: NormalizedData<F>,
186    /// Training outputs
187    yt_norm: NormalizedData<F>,
188    /// Training dataset (input, output)
189    pub(crate) training_data: (Array2<F>, Array1<F>),
190    /// Parameters used to fit this model
191    pub(crate) params: GpValidParams<F, Mean, Corr>,
192}
193
194pub(crate) enum GpSamplingMethod {
195    Cholesky,
196    EigenValues,
197}
198
199/// Kriging as GP special case when using constant mean and squared exponential correlation
200pub type Kriging<F> = GpParams<F, ConstantMean, SquaredExponentialCorr>;
201
202impl<F: Float> Kriging<F> {
203    /// Kriging parameters constructor
204    pub fn params() -> GpParams<F, ConstantMean, SquaredExponentialCorr> {
205        GpParams::new(ConstantMean(), SquaredExponentialCorr())
206    }
207}
208
209impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> Clone
210    for GaussianProcess<F, Mean, Corr>
211{
212    fn clone(&self) -> Self {
213        Self {
214            theta: self.theta.to_owned(),
215            likelihood: self.likelihood,
216            inner_params: self.inner_params.clone(),
217            w_star: self.w_star.to_owned(),
218            xt_norm: self.xt_norm.clone(),
219            yt_norm: self.yt_norm.clone(),
220            training_data: self.training_data.clone(),
221            params: self.params.clone(),
222        }
223    }
224}
225
226impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> fmt::Display
227    for GaussianProcess<F, Mean, Corr>
228{
229    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
230        write!(
231            f,
232            "GP(mean={}, corr={}, theta={}, variance={}, likelihood={})",
233            self.params.mean,
234            self.params.corr,
235            self.theta,
236            self.inner_params.sigma2,
237            self.likelihood,
238        )
239    }
240}
241
242impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>> GaussianProcess<F, Mean, Corr> {
243    /// Gp parameters contructor
244    pub fn params<NewMean: RegressionModel<F>, NewCorr: CorrelationModel<F>>(
245        mean: NewMean,
246        corr: NewCorr,
247    ) -> GpParams<F, NewMean, NewCorr> {
248        GpParams::new(mean, corr)
249    }
250
251    /// Predict output values at n given `x` points of nx components specified as a (n, nx) matrix.
252    /// Returns n scalar output values as a vector (n,).
253    pub fn predict(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Result<Array1<F>> {
254        let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std;
255        // Compute the mean term at x
256        let f = self.params.mean.coefs(&xnorm);
257        // Compute the correlation term at x
258        let corr = self._compute_correlation(&xnorm);
259        // Scaled predictor
260        let y_ = &f.dot(&self.inner_params.beta) + &corr.dot(&self.inner_params.gamma);
261        // Predictor
262        Ok((&y_ * &self.yt_norm.std + &self.yt_norm.mean).remove_axis(Axis(1)))
263    }
264
265    /// Predict variance values at n given `x` points of nx components specified as a (n, nx) matrix.
266    /// Returns n variance values as (n,) column vector.
267    pub fn predict_var(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Result<Array1<F>> {
268        let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std;
269        let corr = self._compute_correlation(&xnorm);
270        let (rt, u) = self._compute_rt_u(&xnorm, &corr);
271
272        let mut mse = Array::ones(rt.ncols()) - rt.mapv(|v| v * v).sum_axis(Axis(0))
273            + u.mapv(|v: F| v * v).sum_axis(Axis(0));
274        mse.mapv_inplace(|v| self.inner_params.sigma2 * v);
275
276        // Mean Squared Error might be slightly negative depending on
277        // machine precision: set to zero in that case
278        Ok(mse.mapv(|v| if v < F::zero() { F::zero() } else { F::cast(v) }))
279    }
280
281    /// Predict both output values and variance at n given `x` points of nx components
282    pub fn predict_valvar(
283        &self,
284        x: &ArrayBase<impl Data<Elem = F>, Ix2>,
285    ) -> Result<(Array1<F>, Array1<F>)> {
286        let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std;
287        // Compute the mean term at x
288        let f = self.params.mean.coefs(&xnorm);
289        // Compute the correlation term at x
290        let corr = self._compute_correlation(&xnorm);
291        // Scaled predictor
292        let y_ = &f.dot(&self.inner_params.beta) + &corr.dot(&self.inner_params.gamma);
293        // Predictor
294        let yp = (&y_ * &self.yt_norm.std + &self.yt_norm.mean).remove_axis(Axis(1));
295
296        let (rt, u) = self._compute_rt_u(&xnorm, &corr);
297
298        let mut mse = Array::ones(rt.ncols()) - rt.mapv(|v| v * v).sum_axis(Axis(0))
299            + u.mapv(|v: F| v * v).sum_axis(Axis(0));
300        mse.mapv_inplace(|v| self.inner_params.sigma2 * v);
301
302        // Mean Squared Error might be slightly negative depending on
303        // machine precision: set to zero in that case
304        let vmse = mse.mapv(|v| if v < F::zero() { F::zero() } else { F::cast(v) });
305
306        Ok((yp, vmse))
307    }
308
309    /// Compute covariance matrix given x points specified as a (n, nx) matrix
310    fn _compute_covariance(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Array2<F> {
311        let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std;
312        let corr = self._compute_correlation(&xnorm);
313        let (rt, u) = self._compute_rt_u(&xnorm, &corr);
314
315        let cross_dx = pairwise_differences(&xnorm, &xnorm);
316        let k = self
317            .params
318            .corr
319            .rval_from_distances(&cross_dx, &self.theta, &self.w_star);
320        let k = k
321            .into_shape_with_order((xnorm.nrows(), xnorm.nrows()))
322            .unwrap();
323
324        // let cov_matrix =
325        //     &array![self.inner_params.sigma2] * (k - rt.t().to_owned().dot(&rt) + u.t().dot(&u));
326        let mut cov_matrix = k - rt.t().to_owned().dot(&rt) + u.t().dot(&u);
327        cov_matrix.mapv_inplace(|v| self.inner_params.sigma2 * v);
328        cov_matrix
329    }
330
331    /// Compute `rt` and `u` matrices and return normalized x as well
332    /// This method factorizes computations done to get variances and covariance matrix
333    fn _compute_rt_u(
334        &self,
335        xnorm: &ArrayBase<impl Data<Elem = F>, Ix2>,
336        corr: &ArrayBase<impl Data<Elem = F>, Ix2>,
337    ) -> (Array2<F>, Array2<F>) {
338        let inners = &self.inner_params;
339
340        let corr_t = corr.t().to_owned();
341        #[cfg(feature = "blas")]
342        let rt = inners
343            .r_chol
344            .to_owned()
345            .with_lapack()
346            .solve_triangular(UPLO::Lower, Diag::NonUnit, &corr_t.with_lapack())
347            .unwrap()
348            .without_lapack();
349        #[cfg(not(feature = "blas"))]
350        let rt = inners
351            .r_chol
352            .solve_triangular(&corr_t, UPLO::Lower)
353            .unwrap();
354
355        let rhs = inners.ft.t().dot(&rt) - self.params.mean.coefs(xnorm).t();
356        #[cfg(feature = "blas")]
357        let u = inners
358            .ft_qr_r
359            .to_owned()
360            .t()
361            .with_lapack()
362            .solve_triangular(UPLO::Upper, Diag::NonUnit, &rhs.with_lapack())
363            .unwrap()
364            .without_lapack();
365        #[cfg(not(feature = "blas"))]
366        let u = inners
367            .ft_qr_r
368            .t()
369            .solve_triangular(&rhs, UPLO::Lower)
370            .unwrap();
371        (rt, u)
372    }
373
374    /// Compute correlation matrix given x points specified as a (n, nx) matrix
375    fn _compute_correlation(&self, xnorm: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Array2<F> {
376        // Get pairwise componentwise L1-distances to the input training set
377        let dx = pairwise_differences(xnorm, &self.xt_norm.data);
378        // Compute the correlation function
379        let r = self
380            .params
381            .corr
382            .rval_from_distances(&dx, &self.theta, &self.w_star);
383        let n_obs = xnorm.nrows();
384        let nt = self.xt_norm.data.nrows();
385        r.into_shape_with_order((n_obs, nt)).unwrap().to_owned()
386    }
387
388    /// Sample the gaussian process for `n_traj` trajectories using cholesky decomposition
389    pub fn sample_chol(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>, n_traj: usize) -> Array2<F> {
390        self._sample(x, n_traj, GpSamplingMethod::Cholesky)
391    }
392
393    /// Sample the gaussian process for `n_traj` trajectories using eigenvalues decomposition
394    pub fn sample_eig(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>, n_traj: usize) -> Array2<F> {
395        self._sample(x, n_traj, GpSamplingMethod::EigenValues)
396    }
397
398    /// Sample the gaussian process for `n_traj` trajectories using eigenvalues decomposition (alias of `sample_eig`)
399    pub fn sample(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>, n_traj: usize) -> Array2<F> {
400        self.sample_eig(x, n_traj)
401    }
402
403    /// Sample the gaussian process for `n_traj` trajectories using either
404    /// cholesky or eigenvalues decomposition to compute the decomposition of the conditioned covariance matrix.
405    /// The later one is recommended as cholesky decomposition suffer from occurence of ill-conditioned matrices
406    /// when the number of x locations increase.
407    fn _sample(
408        &self,
409        x: &ArrayBase<impl Data<Elem = F>, Ix2>,
410        n_traj: usize,
411        method: GpSamplingMethod,
412    ) -> Array2<F> {
413        let mean = self.predict(x).unwrap();
414        let cov = self._compute_covariance(x);
415        sample(x, mean.insert_axis(Axis(1)), cov, n_traj, method)
416    }
417
418    /// Retrieve optimized hyperparameters theta
419    pub fn theta(&self) -> &Array1<F> {
420        &self.theta
421    }
422
423    /// Estimated variance
424    pub fn variance(&self) -> F {
425        self.inner_params.sigma2
426    }
427
428    /// Retrieve reduced likelihood value
429    pub fn likelihood(&self) -> F {
430        self.likelihood
431    }
432
433    /// Retrieve number of PLS components 1 <= n <= x dimension
434    pub fn kpls_dim(&self) -> Option<usize> {
435        if self.w_star.ncols() < self.xt_norm.ncols() {
436            Some(self.w_star.ncols())
437        } else {
438            None
439        }
440    }
441
442    /// Retrieve input and output dimensions
443    pub fn dims(&self) -> (usize, usize) {
444        (self.xt_norm.ncols(), self.yt_norm.ncols())
445    }
446
447    /// Predict derivatives of the output prediction
448    /// wrt the kxth component at a set of n points `x` specified as a (n, nx) matrix where x has nx components.
449    pub fn predict_kth_derivatives(
450        &self,
451        x: &ArrayBase<impl Data<Elem = F>, Ix2>,
452        kx: usize,
453    ) -> Array1<F> {
454        let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std;
455        let corr = self._compute_correlation(&xnorm);
456
457        let beta = &self.inner_params.beta;
458        let gamma = &self.inner_params.gamma;
459
460        let df_dx_kx = if self.inner_params.beta.nrows() <= 1 + self.xt_norm.data.ncols() {
461            // for constant or linear: df/dx = cst ([0] or [1]) for all x, so takes use x[0] to get the constant
462            let df = self.params.mean.jac(&x.row(0));
463            let df_dx = df.t().row(kx).dot(beta);
464            df_dx.broadcast((x.nrows(), 1)).unwrap().to_owned()
465        } else {
466            // for quadratic df/dx really depends on x
467            let mut dfdx = Array2::zeros((x.nrows(), 1));
468            Zip::from(dfdx.rows_mut())
469                .and(xnorm.rows())
470                .for_each(|mut dfxi, xi| {
471                    let df = self.params.mean.jac(&xi);
472                    let df_dx = (df.t().row(kx)).dot(beta);
473                    dfxi.assign(&df_dx);
474                });
475            dfdx
476        };
477
478        let nr = x.nrows();
479        let nc = self.xt_norm.data.nrows();
480        let d_dx_1 = &xnorm
481            .column(kx)
482            .to_owned()
483            .into_shape_with_order((nr, 1))
484            .unwrap()
485            .broadcast((nr, nc))
486            .unwrap()
487            .to_owned();
488
489        let d_dx_2 = self
490            .xt_norm
491            .data
492            .column(kx)
493            .to_owned()
494            .as_standard_layout()
495            .into_shape_with_order((1, nc))
496            .unwrap()
497            .to_owned();
498
499        let d_dx = d_dx_1 - d_dx_2;
500
501        // Get pairwise componentwise L1-distances to the input training set
502        let theta = &self.theta.to_owned();
503        let d_dx_corr = d_dx * corr;
504
505        // (df(xnew)/dx).beta + (dr(xnew)/dx).R^-1(ytrain - f.beta)
506        // gamma = R^-1(ytrain - f.beta)
507        // Warning: squared exponential only
508        let res = (df_dx_kx - d_dx_corr.dot(gamma).map(|v| F::cast(2.) * theta[kx] * *v))
509            * self.yt_norm.std[0]
510            / self.xt_norm.std[kx];
511        res.column(0).to_owned()
512    }
513
514    /// Predict derivatives at a set of point `x` specified as a (n, nx) matrix where x has nx components.
515    /// Returns a (n, nx) matrix containing output derivatives at x wrt each nx components
516    pub fn predict_gradients(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Array2<F> {
517        let mut drv = Array2::<F>::zeros((x.nrows(), self.xt_norm.data.ncols()));
518        Zip::from(drv.rows_mut())
519            .and(x.rows())
520            .for_each(|mut row, xi| {
521                let pred = self.predict_jacobian(&xi);
522                row.assign(&pred.column(0));
523            });
524        drv
525    }
526
527    /// Predict gradient at a given x point
528    /// Note: output is one dimensional, named jacobian as result is given as a one-column matrix  
529    fn predict_jacobian(&self, x: &ArrayBase<impl Data<Elem = F>, Ix1>) -> Array2<F> {
530        let xx = x.to_owned().insert_axis(Axis(0));
531        let mut jac = Array2::zeros((xx.ncols(), 1));
532
533        let xnorm = (xx - &self.xt_norm.mean) / &self.xt_norm.std;
534
535        let beta = &self.inner_params.beta;
536        let gamma = &self.inner_params.gamma;
537
538        let df = self.params.mean.jac(&xnorm.row(0));
539        let df_dx = df.t().dot(beta);
540
541        let dr = self
542            .params
543            .corr
544            .jac(&xnorm.row(0), &self.xt_norm.data, &self.theta, &self.w_star);
545
546        let dr_dx = df_dx + dr.t().dot(gamma);
547        Zip::from(jac.rows_mut())
548            .and(dr_dx.rows())
549            .and(&self.xt_norm.std)
550            .for_each(|mut jc, dr_i, std_i| {
551                let jc_i = dr_i.map(|v| *v * self.yt_norm.std[0] / *std_i);
552                jc.assign(&jc_i)
553            });
554
555        jac
556    }
557
558    /// Predict variance derivatives at a point `x` specified as a (nx,) vector where x has nx components.
559    /// Returns a (nx,) vector containing variance derivatives at `x` wrt each nx components
560    #[cfg(not(feature = "blas"))]
561    pub fn predict_var_gradients_single(
562        &self,
563        x: &ArrayBase<impl Data<Elem = F>, Ix1>,
564    ) -> Array1<F> {
565        let x = &(x.to_owned().insert_axis(Axis(0)));
566        let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std;
567        let sigma2 = self.inner_params.sigma2;
568        let r_chol = &self.inner_params.r_chol;
569
570        let (r, dr) = self.params.corr.rval_with_jac(
571            &xnorm.row(0),
572            &self.xt_norm.data,
573            &self.theta,
574            &self.w_star,
575        );
576
577        // rho1 = Rc^-1 . r(x, X)
578        let rho1 = r_chol.solve_triangular(&r, UPLO::Lower).unwrap();
579
580        // inv_kr = Rc^t^-1 . Rc^-1 . r(x, X) = R^-1 . r(x, X)
581        let inv_kr = r_chol.t().solve_triangular(&rho1, UPLO::Upper).unwrap();
582
583        // p1 = ((dr(x, X)/dx)^t . R^-1 . r(x, X))^t = ((R^-1 . r(x, X))^t . dr(x, X)/dx) = r(x, X)^t . R^-1 . dr(x, X)/dx = p2
584        // let p1 = dr.t().dot(&inv_kr).t().to_owned();
585
586        // p2 = ((R^-1 . r(x, X))^t . dr(x, X)/dx)^t = dr(x, X)/dx)^t . R^-1 . r(x, X) = p1
587        let p2 = inv_kr.t().dot(&dr);
588
589        let f_x = self.params.mean.coefs(&xnorm).t().to_owned();
590        let f_mean = self.params.mean.coefs(&self.xt_norm.data);
591
592        // rho2 = Rc^-1 . F(X)
593        let rho2 = r_chol.solve_triangular(&f_mean, UPLO::Lower).unwrap();
594        // inv_kf = Rc^-1^t . Rc^-1 . F(X) = R^-1 . F(X)
595        let inv_kf = r_chol.t().solve_triangular(&rho2, UPLO::Upper).unwrap();
596
597        // A = f(x)^t - r(x, X)^t . R^-1 . F(X)   -> (1 x m)
598        let a_mat = f_x.t().to_owned() - r.t().dot(&inv_kf);
599
600        // B = F(X)^t . R^-1 . F(X)
601        let b_mat = f_mean.t().dot(&inv_kf);
602        // rho3 = Bc
603        let rho3 = b_mat.cholesky().unwrap();
604        // inv_bat = Bc^-1 . A^t
605        let inv_bat = rho3.solve_triangular(&a_mat.t(), UPLO::Lower).unwrap();
606        // D = Bc^t-1 . Bc^-1 . A^t = B^-1 . A^t
607        let d_mat = rho3.t().solve_triangular(&inv_bat, UPLO::Upper).unwrap();
608
609        let df = self.params.mean.jac(&xnorm.row(0));
610
611        // dA/dx = df(x)/dx^t - dr(x, X)/dx^t . R^-1 . F
612        let d_a = df.t().to_owned() - dr.t().dot(&inv_kf);
613
614        // p3 = (dA/dx . B^-1 . A^t)^t = A . B^-1 . dA/dx^t
615        // let p3 = d_a.dot(&d_mat).t().to_owned();
616
617        // p4 = (B^-1 . A)^t . dA/dx^t = A^t . B^-1 . dA/dx^t = p3
618        let p4 = d_mat.t().dot(&d_a.t());
619        let two = F::cast(2.);
620        let prime = (p4 - p2).mapv(|v| two * v);
621
622        let x_std = &self.xt_norm.std;
623        let dvar = (prime / x_std).mapv(|v| v * sigma2);
624        dvar.row(0).into_owned()
625    }
626
627    /// See non blas version
628    #[cfg(feature = "blas")]
629    pub fn predict_var_gradients_single(
630        &self,
631        x: &ArrayBase<impl Data<Elem = F>, Ix1>,
632    ) -> Array1<F> {
633        let x = &(x.to_owned().insert_axis(Axis(0)));
634        let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std;
635
636        let dx = pairwise_differences(&xnorm, &self.xt_norm.data);
637
638        let sigma2 = self.inner_params.sigma2;
639        let r_chol = &self.inner_params.r_chol.to_owned().with_lapack();
640
641        let r = self
642            .params
643            .corr
644            .rval_from_distances(&dx, &self.theta, &self.w_star)
645            .with_lapack();
646        let dr = self
647            .params
648            .corr
649            .jac(&xnorm.row(0), &self.xt_norm.data, &self.theta, &self.w_star)
650            .with_lapack();
651
652        let rho1 = r_chol
653            .solve_triangular(UPLO::Lower, Diag::NonUnit, &r)
654            .unwrap();
655        let inv_kr = r_chol
656            .t()
657            .solve_triangular(UPLO::Upper, Diag::NonUnit, &rho1)
658            .unwrap();
659
660        // let p1 = dr.t().dot(&inv_kr).t().to_owned();
661
662        let p2 = inv_kr.t().dot(&dr);
663
664        let f_x = self.params.mean.coefs(x).t().to_owned();
665        let f_mean = self.params.mean.coefs(&self.xt_norm.data).with_lapack();
666
667        let rho2 = r_chol
668            .solve_triangular(UPLO::Lower, Diag::NonUnit, &f_mean)
669            .unwrap();
670        let inv_kf = r_chol
671            .t()
672            .solve_triangular(UPLO::Upper, Diag::NonUnit, &rho2)
673            .unwrap();
674
675        let a_mat = f_x.t().to_owned().with_lapack() - r.t().dot(&inv_kf);
676
677        let b_mat = f_mean.t().dot(&inv_kf);
678
679        let d_mat = match b_mat.cholesky(UPLO::Lower) {
680            Ok(rho3) => {
681                let inv_bat = rho3
682                    .solve_triangular(UPLO::Upper, Diag::NonUnit, &a_mat.t().to_owned())
683                    .unwrap();
684                rho3.t()
685                    .solve_triangular(UPLO::Upper, Diag::NonUnit, &inv_bat)
686                    .unwrap()
687            }
688            Err(_) => {
689                warn!("Cholesky decomposition error during variance dervivatives computation");
690                Array2::zeros((b_mat.nrows(), b_mat.ncols()))
691            }
692        };
693
694        let df = self.params.mean.jac(&xnorm.row(0)).with_lapack();
695
696        let d_a = df.t().to_owned() - dr.t().dot(&inv_kf);
697        // let p3 = d_a.dot(&d_mat).t();
698        let p4 = d_mat.t().dot(&d_a.t());
699
700        let two = F::cast(2.);
701        let prime_t = (p4 - p2).without_lapack().mapv(|v| two * v);
702
703        let x_std = &self.xt_norm.std;
704        let dvar = (prime_t / x_std).mapv(|v| v * sigma2);
705        dvar.row(0).into_owned()
706    }
707
708    /// Predict variance derivatives at a set of points `x` specified as a (n, nx) matrix where x has nx components.
709    /// Returns a (n, nx) matrix containing variance derivatives at `x` wrt each nx components
710    pub fn predict_var_gradients(&self, x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> Array2<F> {
711        let mut derivs = Array::zeros((x.nrows(), x.ncols()));
712        Zip::from(derivs.rows_mut())
713            .and(x.rows())
714            .for_each(|mut der, x| der.assign(&self.predict_var_gradients_single(&x)));
715        derivs
716    }
717
718    /// Predict both value and variance gradients at a set of points `x` specified as a (n, nx) matrix
719    /// where x has nx components.
720    pub fn predict_valvar_gradients(
721        &self,
722        x: &ArrayBase<impl Data<Elem = F>, Ix2>,
723    ) -> (Array2<F>, Array2<F>) {
724        let mut val_derivs = Array::zeros((x.nrows(), x.ncols()));
725        let mut var_derivs = Array::zeros((x.nrows(), x.ncols()));
726        Zip::from(val_derivs.rows_mut())
727            .and(var_derivs.rows_mut())
728            .and(x.rows())
729            .for_each(|mut val_der, mut var_der, x| {
730                val_der.assign(&self.predict_jacobian(&x).column(0));
731                var_der.assign(&self.predict_var_gradients_single(&x));
732            });
733        (val_derivs, var_derivs)
734    }
735}
736
737impl<F, D, Mean, Corr> PredictInplace<ArrayBase<D, Ix2>, Array1<F>>
738    for GaussianProcess<F, Mean, Corr>
739where
740    F: Float,
741    D: Data<Elem = F>,
742    Mean: RegressionModel<F>,
743    Corr: CorrelationModel<F>,
744{
745    fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array1<F>) {
746        assert_eq!(
747            x.nrows(),
748            y.len(),
749            "The number of data points must match the number of output targets."
750        );
751
752        let values = self.predict(x).expect("GP Prediction");
753        *y = values;
754    }
755
756    fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array1<F> {
757        Array1::zeros((x.nrows(),))
758    }
759}
760
761/// Gausssian Process adaptator to implement `linfa::Predict` trait for variance prediction.
762#[allow(dead_code)]
763pub struct GpVariancePredictor<'a, F, Mean, Corr>(&'a GaussianProcess<F, Mean, Corr>)
764where
765    F: Float,
766    Mean: RegressionModel<F>,
767    Corr: CorrelationModel<F>;
768
769impl<F, D, Mean, Corr> PredictInplace<ArrayBase<D, Ix2>, Array1<F>>
770    for GpVariancePredictor<'_, F, Mean, Corr>
771where
772    F: Float,
773    D: Data<Elem = F>,
774    Mean: RegressionModel<F>,
775    Corr: CorrelationModel<F>,
776{
777    fn predict_inplace(&self, x: &ArrayBase<D, Ix2>, y: &mut Array1<F>) {
778        assert_eq!(
779            x.nrows(),
780            y.len(),
781            "The number of data points must match the number of output targets."
782        );
783
784        let values = self.0.predict_var(x).expect("GP Prediction");
785        *y = values;
786    }
787
788    fn default_target(&self, x: &ArrayBase<D, Ix2>) -> Array1<F> {
789        Array1::zeros(x.nrows())
790    }
791}
792
793impl<F: Float, Mean: RegressionModel<F>, Corr: CorrelationModel<F>, D: Data<Elem = F>>
794    Fit<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>, GpError> for GpValidParams<F, Mean, Corr>
795{
796    type Object = GaussianProcess<F, Mean, Corr>;
797
798    /// Fit GP parameters using maximum likelihood
799    fn fit(
800        &self,
801        dataset: &DatasetBase<ArrayBase<D, Ix2>, ArrayBase<D, Ix1>>,
802    ) -> Result<Self::Object> {
803        let x = dataset.records();
804        let y = dataset.targets().to_owned().insert_axis(Axis(1));
805
806        if let Some(d) = self.kpls_dim()
807            && *d > x.ncols()
808        {
809            return Err(GpError::InvalidValueError(format!(
810                "Dimension reduction {} should be smaller than actual \
811                    training input dimensions {}",
812                d,
813                x.ncols()
814            )));
815        }
816
817        let dim = if let Some(n_components) = self.kpls_dim() {
818            *n_components
819        } else {
820            x.ncols()
821        };
822
823        let (x, y, active, init) = match self.theta_tuning() {
824            ThetaTuning::Fixed(init) | ThetaTuning::Full { init, bounds: _ } => (
825                x.to_owned(),
826                y.to_owned(),
827                (0..dim).collect::<Vec<_>>(),
828                init,
829            ),
830            ThetaTuning::Partial {
831                init,
832                bounds: _,
833                active,
834            } => (x.to_owned(), y.to_owned(), active.to_vec(), init),
835        };
836        // Initial guess for theta
837        let theta0_dim = init.len();
838        let theta0 = if theta0_dim == 1 {
839            Array1::from_elem(dim, init[0])
840        } else if theta0_dim == dim {
841            init.to_owned()
842        } else {
843            panic!(
844                "Initial guess for theta should be either 1-dim or dim of xtrain (w_star.ncols()), got {theta0_dim}"
845            )
846        };
847
848        let xtrain = NormalizedData::new(&x);
849        let ytrain = NormalizedData::new(&y);
850
851        let mut w_star = Array2::eye(x.ncols());
852        if let Some(n_components) = self.kpls_dim() {
853            let ds = Dataset::new(x.to_owned(), y.to_owned());
854            w_star = PlsRegression::params(*n_components).fit(&ds).map_or_else(
855                |e| match e {
856                    linfa_pls::PlsError::PowerMethodConstantResidualError() => {
857                        Ok(Array2::zeros((x.ncols(), *n_components)))
858                    }
859                    err => Err(err),
860                },
861                |v| Ok(v.rotations().0.to_owned()),
862            )?;
863        };
864        let x_distances = DiffMatrix::new(&xtrain.data);
865        let sums = x_distances
866            .d
867            .mapv(|v| num_traits::float::Float::abs(v))
868            .sum_axis(Axis(1));
869        if *sums.min().unwrap() == F::zero() {
870            println!(
871                "Warning: multiple x input features have the same value (at least same row twice)."
872            );
873        }
874        let fx = self.mean().coefs(&xtrain.data);
875
876        let opt_params = match self.theta_tuning() {
877            ThetaTuning::Fixed(init) => {
878                // Easy path no optimization
879                init.to_owned()
880            }
881            ThetaTuning::Full { init: _, bounds }
882            | ThetaTuning::Partial {
883                init: _,
884                bounds,
885                active: _,
886            } => {
887                let base: f64 = 10.;
888                let objfn = |x: &[f64], _gradient: Option<&mut [f64]>, _params: &mut ()| -> f64 {
889                    let mut theta = theta0.to_owned();
890                    let xarr = x.iter().map(|v| base.powf(*v)).collect::<Vec<_>>();
891                    std::iter::zip(active.clone(), xarr).for_each(|(i, xi)| theta[i] = F::cast(xi));
892
893                    for v in theta.iter() {
894                        // check theta as optimizer may return nan values
895                        if v.is_nan() {
896                            // shortcut return worst value wrt to rlf minimization
897                            return f64::INFINITY;
898                        }
899                    }
900                    let rxx = self
901                        .corr()
902                        .rval_from_distances(&x_distances.d, &theta, &w_star);
903                    match reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget()) {
904                        Ok(r) => unsafe { -(*(&r.0 as *const F as *const f64)) },
905                        Err(_) => f64::INFINITY,
906                    }
907                };
908
909                // Multistart: user theta0 + 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1., 10.
910                // let bounds = vec![(F::cast(-6.), F::cast(2.)); theta0.len()];
911                let bounds_dim = bounds.len();
912                let bounds = if bounds_dim == 1 {
913                    vec![bounds[0]; w_star.ncols()]
914                } else if bounds_dim == w_star.ncols() {
915                    bounds.to_vec()
916                } else {
917                    panic!(
918                        "Bounds for theta should be either 1-dim or dim of xtrain ({}), got {}",
919                        w_star.ncols(),
920                        bounds_dim
921                    )
922                };
923
924                // Select init params and bounds wrt to activity
925                let active_bounds = bounds
926                    .iter()
927                    .enumerate()
928                    .filter(|(i, _)| active.contains(i))
929                    .map(|(_, &b)| b)
930                    .collect::<Vec<_>>();
931                let (theta_inits, bounds) = prepare_multistart(
932                    self.n_start(),
933                    &theta0.select(Axis(0), &active),
934                    &active_bounds,
935                );
936                debug!("Optimize with multistart theta = {theta_inits:?} and bounds = {bounds:?}");
937                let now = Instant::now();
938                let opt_params = (0..theta_inits.nrows())
939                    .into_par_iter()
940                    .map(|i| {
941                        optimize_params(
942                            objfn,
943                            &theta_inits.row(i).to_owned(),
944                            &bounds,
945                            CobylaParams {
946                                maxeval: (10 * theta_inits.ncols())
947                                    .clamp(GP_COBYLA_MIN_EVAL, self.max_eval()),
948                                ..CobylaParams::default()
949                            },
950                        )
951                    })
952                    .reduce(
953                        || (f64::INFINITY, Array::ones((theta_inits.ncols(),))),
954                        |a, b| if b.0 < a.0 { b } else { a },
955                    );
956                debug!("elapsed optim = {:?}", now.elapsed().as_millis());
957                opt_params.1.mapv(|v| F::cast(base.powf(v)))
958            }
959        };
960
961        // In case of partial optimization we set only active components
962        let opt_params = match self.theta_tuning() {
963            ThetaTuning::Fixed(_) | ThetaTuning::Full { init: _, bounds: _ } => opt_params,
964            ThetaTuning::Partial {
965                init,
966                bounds: _,
967                active,
968            } => {
969                let mut opt_theta = init.to_owned();
970                std::iter::zip(active.clone(), opt_params)
971                    .for_each(|(i, xi)| opt_theta[i] = F::cast(xi));
972                opt_theta
973            }
974        };
975
976        let rxx = self
977            .corr()
978            .rval_from_distances(&x_distances.d, &opt_params, &w_star);
979        let (lkh, inner_params) =
980            reduced_likelihood(&fx, rxx, &x_distances, &ytrain, self.nugget())?;
981        Ok(GaussianProcess {
982            theta: opt_params,
983            likelihood: lkh,
984            inner_params,
985            w_star,
986            xt_norm: xtrain,
987            yt_norm: ytrain,
988            training_data: (x.to_owned(), y.to_owned().remove_axis(Axis(1))),
989            params: self.clone(),
990        })
991    }
992}
993
994/// Compute reduced likelihood function
995/// fx: mean factors term at x samples,
996/// rxx: correlation factors at x samples,
997/// x_distances: pairwise distances between x samples
998/// ytrain: normalized output training values
999/// nugget: factor to improve numerical stability  
1000#[cfg(not(feature = "blas"))]
1001fn reduced_likelihood<F: Float>(
1002    fx: &ArrayBase<impl Data<Elem = F>, Ix2>,
1003    rxx: ArrayBase<impl Data<Elem = F>, Ix2>,
1004    x_distances: &DiffMatrix<F>,
1005    ytrain: &NormalizedData<F>,
1006    nugget: F,
1007) -> Result<(F, GpInnerParams<F>)> {
1008    // Set up R
1009    let mut r_mx: Array2<F> = Array2::<F>::eye(x_distances.n_obs).mapv(|v| v + v * nugget);
1010    for (i, ij) in x_distances.d_indices.outer_iter().enumerate() {
1011        r_mx[[ij[0], ij[1]]] = rxx[[i, 0]];
1012        r_mx[[ij[1], ij[0]]] = rxx[[i, 0]];
1013    }
1014    let fxl = fx;
1015    // R cholesky decomposition
1016    let r_chol = r_mx.cholesky()?;
1017    // Solve generalized least squared problem
1018    let ft = r_chol.solve_triangular(fxl, UPLO::Lower)?;
1019    let (ft_qr_q, ft_qr_r) = ft.qr().unwrap().into_decomp();
1020
1021    // Check whether we have an ill-conditionned problem
1022    let (_, sv_qr_r, _) = ft_qr_r.svd(false, false).unwrap();
1023    let cond_ft = sv_qr_r[sv_qr_r.len() - 1] / sv_qr_r[0];
1024    if F::cast(cond_ft) < F::cast(1e-10) {
1025        let (_, sv_f, _) = &fxl.svd(false, false).unwrap();
1026        let cond_fx = sv_f[0] / sv_f[sv_f.len() - 1];
1027        if F::cast(cond_fx) > F::cast(1e15) {
1028            return Err(GpError::LikelihoodComputationError(
1029                "F is too ill conditioned. Poor combination \
1030                of regression model and observations."
1031                    .to_string(),
1032            ));
1033        } else {
1034            // ft is too ill conditioned, get out (try different theta)
1035            return Err(GpError::LikelihoodComputationError(
1036                "ft is too ill conditioned, try another theta again".to_string(),
1037            ));
1038        }
1039    }
1040    let yt = r_chol.solve_triangular(&ytrain.data, UPLO::Lower)?;
1041
1042    let beta = ft_qr_r.solve_triangular_into(ft_qr_q.t().dot(&yt), UPLO::Upper)?;
1043    let rho = yt - ft.dot(&beta);
1044    let rho_sqr = rho.mapv(|v| v * v).sum_axis(Axis(0));
1045
1046    let gamma = r_chol.t().solve_triangular_into(rho, UPLO::Upper)?;
1047    // The determinant of R is equal to the squared product of
1048    // the diagonal elements of its Cholesky decomposition r_chol
1049    let n_obs: F = F::cast(x_distances.n_obs);
1050
1051    let logdet = r_chol.diag().mapv(|v: F| v.log10()).sum() * F::cast(2.) / n_obs;
1052
1053    // Reduced likelihood
1054    let sigma2 = rho_sqr / n_obs;
1055    let reduced_likelihood = -n_obs * (sigma2.sum().log10() + logdet);
1056
1057    Ok((
1058        reduced_likelihood,
1059        GpInnerParams {
1060            sigma2: sigma2[0] * ytrain.std[0] * ytrain.std[0],
1061            beta,
1062            gamma,
1063            r_chol,
1064            ft,
1065            ft_qr_r,
1066        },
1067    ))
1068}
1069
1070/// See non blas version
1071#[cfg(feature = "blas")]
1072fn reduced_likelihood<F: Float>(
1073    fx: &ArrayBase<impl Data<Elem = F>, Ix2>,
1074    rxx: ArrayBase<impl Data<Elem = F>, Ix2>,
1075    x_distances: &DiffMatrix<F>,
1076    ytrain: &NormalizedData<F>,
1077    nugget: F,
1078) -> Result<(F, GpInnerParams<F>)> {
1079    // Set up R
1080    let mut r_mx: Array2<F> = Array2::<F>::eye(x_distances.n_obs).mapv(|v| v + v * nugget);
1081    for (i, ij) in x_distances.d_indices.outer_iter().enumerate() {
1082        r_mx[[ij[0], ij[1]]] = rxx[[i, 0]];
1083        r_mx[[ij[1], ij[0]]] = rxx[[i, 0]];
1084    }
1085
1086    let fxl = fx.to_owned().with_lapack();
1087
1088    // R cholesky decomposition
1089    let r_chol = r_mx.with_lapack().cholesky(UPLO::Lower)?;
1090
1091    // Solve generalized least squared problem
1092    let ft = r_chol.solve_triangular(UPLO::Lower, Diag::NonUnit, &fxl)?;
1093    let (ft_qr_q, ft_qr_r) = ft.qr().unwrap();
1094
1095    // Check whether we have an ill-conditionned problem
1096    let (_, sv_qr_r, _) = ft_qr_r.svd(false, false).unwrap();
1097    let cond_ft = sv_qr_r[sv_qr_r.len() - 1] / sv_qr_r[0];
1098    if F::cast(cond_ft) < F::cast(1e-10) {
1099        let (_, sv_f, _) = &fxl.svd(false, false).unwrap();
1100        let cond_fx = sv_f[0] / sv_f[sv_f.len() - 1];
1101        if F::cast(cond_fx) > F::cast(1e15) {
1102            return Err(GpError::LikelihoodComputationError(
1103                "F is too ill conditioned. Poor combination \
1104                of regression model and observations."
1105                    .to_string(),
1106            ));
1107        } else {
1108            // ft is too ill conditioned, get out (try different theta)
1109            return Err(GpError::LikelihoodComputationError(
1110                "ft is too ill conditioned, try another theta again".to_string(),
1111            ));
1112        }
1113    }
1114
1115    let yt = r_chol.solve_triangular(
1116        UPLO::Lower,
1117        Diag::NonUnit,
1118        &ytrain.data.to_owned().with_lapack(),
1119    )?;
1120
1121    let beta = ft_qr_r.solve_triangular_into(UPLO::Upper, Diag::NonUnit, ft_qr_q.t().dot(&yt))?;
1122
1123    let rho = yt - ft.dot(&beta);
1124    let rho_sqr = rho.mapv(|v| v * v).sum_axis(Axis(0));
1125    let rho_sqr = rho_sqr.without_lapack();
1126
1127    let gamma = r_chol
1128        .t()
1129        .solve_triangular_into(UPLO::Upper, Diag::NonUnit, rho)?;
1130
1131    // The determinant of R is equal to the squared product of
1132    // the diagonal elements of its Cholesky decomposition r_chol
1133    let n_obs: F = F::cast(x_distances.n_obs);
1134
1135    let logdet = r_chol
1136        .to_owned()
1137        .without_lapack()
1138        .diag()
1139        .mapv(|v: F| v.log10())
1140        .sum()
1141        * F::cast(2.)
1142        / n_obs;
1143
1144    // Reduced likelihood
1145    let sigma2: Array1<F> = rho_sqr / n_obs;
1146    let reduced_likelihood = -n_obs * (sigma2.sum().log10() + logdet);
1147    Ok((
1148        reduced_likelihood,
1149        GpInnerParams {
1150            sigma2: sigma2[0] * ytrain.std[0] * ytrain.std[0],
1151            beta: beta.without_lapack(),
1152            gamma: gamma.without_lapack(),
1153            r_chol: r_chol.without_lapack(),
1154            ft: ft.without_lapack(),
1155            ft_qr_r: ft_qr_r.without_lapack(),
1156        },
1157    ))
1158}
1159
1160/// Sample the gaussian process for `n_traj` trajectories using either
1161/// cholesky or eigenvalues decomposition to compute the decomposition of the conditioned covariance matrix.
1162/// `cov_x` is the covariance matrix at the given x points [n, nx]
1163/// The later one is recommended as cholesky decomposition suffer from occurence of ill-conditioned matrices
1164/// when the number of x locations increase.
1165pub(crate) fn sample<F: Float>(
1166    x: &ArrayBase<impl Data<Elem = F>, Ix2>,
1167    mean_x: Array2<F>,
1168    cov_x: Array2<F>,
1169    n_traj: usize,
1170    method: GpSamplingMethod,
1171) -> Array2<F> {
1172    let n_eval = x.nrows();
1173    let c = match method {
1174        GpSamplingMethod::Cholesky => {
1175            #[cfg(not(feature = "blas"))]
1176            let c = cov_x.with_lapack().cholesky().unwrap();
1177            #[cfg(feature = "blas")]
1178            let c = cov_x.with_lapack().cholesky(UPLO::Lower).unwrap();
1179            c
1180        }
1181        GpSamplingMethod::EigenValues => {
1182            #[cfg(feature = "blas")]
1183            let (v, w) = cov_x.with_lapack().eigh(UPLO::Lower).unwrap();
1184            #[cfg(not(feature = "blas"))]
1185            let (v, w) = cov_x.with_lapack().eigh_into().unwrap();
1186            let v = v.mapv(F::cast);
1187            let v = v.mapv(|x| {
1188                // We lower bound the float value at 1e-9
1189                if x < F::cast(1e-9) {
1190                    return F::zero();
1191                }
1192                x.sqrt()
1193            });
1194            let d = Array2::from_diag(&v).with_lapack();
1195            #[cfg(feature = "blas")]
1196            let c = w.dot(&d);
1197            #[cfg(not(feature = "blas"))]
1198            let c = w.dot(&d);
1199            c
1200        }
1201    }
1202    .without_lapack();
1203    let normal = Normal::new(0., 1.).unwrap();
1204    let ary = Array::random((n_eval, n_traj), normal).mapv(|v| F::cast(v));
1205    mean_x.to_owned() + c.dot(&ary)
1206}
1207
1208#[cfg(test)]
1209mod tests {
1210    use super::*;
1211    use approx::{assert_abs_diff_eq, assert_abs_diff_ne};
1212    use argmin_testfunctions::rosenbrock;
1213    use egobox_doe::{Lhs, LhsKind, SamplingMethod};
1214    use linfa::prelude::Predict;
1215    #[cfg(not(feature = "blas"))]
1216    use linfa_linalg::norm::Norm;
1217    use ndarray::{Array, Zip, arr1, arr2, array};
1218    #[cfg(feature = "blas")]
1219    use ndarray_linalg::Norm;
1220    use ndarray_npy::write_npy;
1221    use ndarray_rand::RandomExt;
1222    use ndarray_rand::rand::SeedableRng;
1223    use ndarray_rand::rand_distr::Uniform;
1224    use ndarray_stats::DeviationExt;
1225    use paste::paste;
1226    use rand_xoshiro::Xoshiro256Plus;
1227
1228    #[test]
1229    fn test_constant_function() {
1230        let dim = 3;
1231        let lim = array![[0., 1.]];
1232        let xlimits = lim.broadcast((dim, 2)).unwrap();
1233        let rng = Xoshiro256Plus::seed_from_u64(42);
1234        let nt = 5;
1235        let xt = Lhs::new(&xlimits).with_rng(rng).sample(nt);
1236        let yt = Array::from_vec(vec![3.1; nt]);
1237        let gp = GaussianProcess::<f64, ConstantMean, SquaredExponentialCorr>::params(
1238            ConstantMean::default(),
1239            SquaredExponentialCorr::default(),
1240        )
1241        .theta_init(array![0.1])
1242        .kpls_dim(Some(1))
1243        .fit(&Dataset::new(xt, yt))
1244        .expect("GP fit error");
1245        let rng = Xoshiro256Plus::seed_from_u64(43);
1246        let xtest = Lhs::new(&xlimits).with_rng(rng).sample(nt);
1247        let ytest = gp.predict(&xtest).expect("prediction error");
1248        assert_abs_diff_eq!(Array::from_elem((nt,), 3.1), ytest, epsilon = 1e-6);
1249    }
1250
1251    macro_rules! test_gp {
1252        ($regr:ident, $corr:ident) => {
1253            paste! {
1254
1255                #[test]
1256                fn [<test_gp_ $regr:snake _ $corr:snake >]() {
1257                    let xt = array![[0.0], [1.0], [2.0], [3.0], [4.0]];
1258                    let xplot = Array::linspace(0., 4., 100).insert_axis(Axis(1));
1259                    let yt = array![0.0, 1.0, 1.5, 0.9, 1.0];
1260                    let gp = GaussianProcess::<f64, [<$regr Mean>], [<$corr Corr>] >::params(
1261                        [<$regr Mean>]::default(),
1262                        [<$corr Corr>]::default(),
1263                    )
1264                    .theta_init(array![0.1])
1265                    .fit(&Dataset::new(xt, yt))
1266                    .expect("GP fit error");
1267                    let yvals = gp
1268                        .predict(&arr2(&[[1.0], [3.5]]))
1269                        .expect("prediction error");
1270                    let expected_y = arr1(&[1.0, 0.9]);
1271                    assert_abs_diff_eq!(expected_y, yvals, epsilon = 0.5);
1272
1273                    let gpr_vals = gp.predict(&xplot).unwrap();
1274
1275                    let yvars = gp
1276                        .predict_var(&arr2(&[[1.0], [3.5]]))
1277                        .expect("prediction error");
1278                    let expected_vars = arr1(&[0., 0.1]);
1279                    assert_abs_diff_eq!(expected_vars, yvars, epsilon = 0.5);
1280
1281                    let gpr_vars = gp.predict_var(&xplot).unwrap();
1282
1283                    let test_dir = "target/tests";
1284                    std::fs::create_dir_all(test_dir).ok();
1285
1286                    let xplot_file = stringify!([<gp_x_ $regr:snake _ $corr:snake >]);
1287                    let file_path = format!("{}/{}.npy", test_dir, xplot_file);
1288                    write_npy(file_path, &xplot).expect("x saved");
1289
1290                    let gp_vals_file = stringify!([<gp_vals_ $regr:snake _ $corr:snake >]);
1291                    let file_path = format!("{}/{}.npy", test_dir, gp_vals_file);
1292                    write_npy(file_path, &gpr_vals).expect("gp vals saved");
1293
1294                    let gp_vars_file = stringify!([<gp_vars_ $regr:snake _ $corr:snake >]);
1295                    let file_path = format!("{}/{}.npy", test_dir, gp_vars_file);
1296                    write_npy(file_path, &gpr_vars).expect("gp vars saved");
1297                }
1298            }
1299        };
1300    }
1301
1302    test_gp!(Constant, SquaredExponential);
1303    test_gp!(Constant, AbsoluteExponential);
1304    test_gp!(Constant, Matern32);
1305    test_gp!(Constant, Matern52);
1306
1307    test_gp!(Linear, SquaredExponential);
1308    test_gp!(Linear, AbsoluteExponential);
1309    test_gp!(Linear, Matern32);
1310    test_gp!(Linear, Matern52);
1311
1312    test_gp!(Quadratic, SquaredExponential);
1313    test_gp!(Quadratic, AbsoluteExponential);
1314    test_gp!(Quadratic, Matern32);
1315    test_gp!(Quadratic, Matern52);
1316
1317    fn griewank(x: &Array2<f64>) -> Array1<f64> {
1318        let dim = x.ncols();
1319        let d = Array1::linspace(1., dim as f64, dim).mapv(|v| v.sqrt());
1320        let mut y = Array1::zeros((x.nrows(),));
1321        Zip::from(&mut y).and(x.rows()).for_each(|y, x| {
1322            let s = x.mapv(|v| v * v).sum() / 4000.;
1323            let p = (x.to_owned() / &d)
1324                .mapv(|v| v.cos())
1325                .fold(1., |acc, x| acc * x);
1326            *y = s - p + 1.;
1327        });
1328        y
1329    }
1330
1331    #[test]
1332    fn test_griewank() {
1333        let x = array![[1., 1., 1., 1., 1.], [2., 2., 2., 2., 2.]];
1334        assert_abs_diff_eq!(array![0.72890641, 1.01387135], griewank(&x), epsilon = 1e-8);
1335    }
1336
1337    #[test]
1338    fn test_kpls_griewank() {
1339        let dims = [5]; // , 10, 60];
1340        let nts = [100]; // , 300, 500];
1341        let lim = array![[-600., 600.]];
1342
1343        let test_dir = "target/tests";
1344        std::fs::create_dir_all(test_dir).ok();
1345
1346        (0..dims.len()).for_each(|i| {
1347            let dim = dims[i];
1348            let nt = nts[i];
1349            let xlimits = lim.broadcast((dim, 2)).unwrap();
1350
1351            let prefix = "griewank";
1352            let xfilename = format!("{test_dir}/{prefix}_xt_{nt}x{dim}.npy");
1353            let yfilename = format!("{test_dir}/{prefix}_yt_{nt}x1.npy");
1354
1355            let rng = Xoshiro256Plus::seed_from_u64(42);
1356            let xt = Lhs::new(&xlimits).with_rng(rng).sample(nt);
1357            write_npy(xfilename, &xt).expect("cannot save xt");
1358            let yt = griewank(&xt);
1359            write_npy(yfilename, &yt).expect("cannot save yt");
1360
1361            let gp = GaussianProcess::<f64, ConstantMean, SquaredExponentialCorr>::params(
1362                ConstantMean::default(),
1363                SquaredExponentialCorr::default(),
1364            )
1365            .kpls_dim(Some(3))
1366            .fit(&Dataset::new(xt, yt))
1367            .expect("GP fit error");
1368
1369            let rng = Xoshiro256Plus::seed_from_u64(0);
1370            let xtest = Lhs::new(&xlimits).with_rng(rng).sample(100);
1371            //let xtest = Array2::ones((1, dim));
1372            let ytest = gp.predict(&xtest).expect("prediction error");
1373            let ytrue = griewank(&xtest);
1374
1375            let nrmse = (ytrue.to_owned() - &ytest).norm_l2() / ytrue.norm_l2();
1376            println!(
1377                "diff={}  ytrue={} nrsme={}",
1378                (ytrue.to_owned() - &ytest).norm_l2(),
1379                ytrue.norm_l2(),
1380                nrmse
1381            );
1382            assert_abs_diff_eq!(nrmse, 0., epsilon = 1e-2);
1383        });
1384    }
1385
1386    fn tensor_product_exp(x: &ArrayBase<impl Data<Elem = f64>, Ix2>) -> Array1<f64> {
1387        x.mapv(|v| v.exp()).map_axis(Axis(1), |row| row.product())
1388    }
1389
1390    #[test]
1391    fn test_kpls_tp_exp() {
1392        let dim = 3;
1393        let nt = 300;
1394        let lim = array![[-1., 1.]];
1395        let xlimits = lim.broadcast((dim, 2)).unwrap();
1396        let rng = Xoshiro256Plus::seed_from_u64(42);
1397        let xt = Lhs::new(&xlimits).with_rng(rng).sample(nt);
1398        let yt = tensor_product_exp(&xt);
1399
1400        let gp = GaussianProcess::<f64, ConstantMean, SquaredExponentialCorr>::params(
1401            ConstantMean::default(),
1402            SquaredExponentialCorr::default(),
1403        )
1404        .kpls_dim(Some(1))
1405        .fit(&Dataset::new(xt, yt))
1406        .expect("GP training");
1407
1408        let xv = Lhs::new(&xlimits).sample(100);
1409        let yv = tensor_product_exp(&xv);
1410
1411        let ytest = gp.predict(&xv).unwrap();
1412        let err = ytest.l2_dist(&yv).unwrap() / yv.norm_l2();
1413        assert_abs_diff_eq!(err, 0., epsilon = 2e-2);
1414    }
1415
1416    fn rosenb(x: &ArrayBase<impl Data<Elem = f64>, Ix2>) -> Array1<f64> {
1417        let mut y: Array1<f64> = Array1::zeros((x.nrows(),));
1418        Zip::from(&mut y).and(x.rows()).par_for_each(|yi, xi| {
1419            *yi = rosenbrock(&xi.to_vec());
1420        });
1421        y
1422    }
1423
1424    #[test]
1425    fn test_kpls_rosenb() {
1426        let dim = 20;
1427        let nt = 30;
1428        let lim = array![[-1., 1.]];
1429        let xlimits = lim.broadcast((dim, 2)).unwrap();
1430        let rng = Xoshiro256Plus::seed_from_u64(42);
1431        let xt = Lhs::new(&xlimits).with_rng(rng).sample(nt);
1432        let yt = rosenb(&xt);
1433
1434        let gp = GaussianProcess::<f64, ConstantMean, Matern32Corr>::params(
1435            ConstantMean::default(),
1436            Matern52Corr::default(),
1437        )
1438        .kpls_dim(Some(1))
1439        .fit(&Dataset::new(xt.to_owned(), yt))
1440        .expect("GP training");
1441
1442        let rng2 = Xoshiro256Plus::seed_from_u64(41);
1443        let xv = Lhs::new(&xlimits).with_rng(rng2).sample(300);
1444        let yv = rosenb(&xv);
1445
1446        let ytest = gp.predict(&xv).expect("GP prediction");
1447        let err = ytest.l2_dist(&yv).unwrap() / yv.norm_l2();
1448        assert_abs_diff_eq!(err, 0., epsilon = 4e-1);
1449
1450        let var = GpVariancePredictor(&gp).predict(&xt);
1451        assert_abs_diff_eq!(var, Array1::zeros(nt), epsilon = 2e-1);
1452    }
1453
1454    fn sphere(x: &Array2<f64>) -> Array1<f64> {
1455        (x * x).sum_axis(Axis(1))
1456    }
1457
1458    fn dsphere(x: &Array2<f64>) -> Array2<f64> {
1459        x.mapv(|v| 2. * v)
1460    }
1461
1462    fn norm1(x: &Array2<f64>) -> Array1<f64> {
1463        x.mapv(|v| v.abs()).sum_axis(Axis(1)).to_owned()
1464    }
1465
1466    fn dnorm1(x: &Array2<f64>) -> Array2<f64> {
1467        x.mapv(|v| if v > 0. { 1. } else { -1. })
1468    }
1469
1470    macro_rules! test_gp_derivatives {
1471        ($regr:ident, $corr:ident, $func:ident, $limit:expr_2021, $nt:expr_2021) => {
1472            paste! {
1473
1474                #[test]
1475                fn [<test_gp_derivatives_ $regr:snake _ $corr:snake>]() {
1476                    let mut rng = Xoshiro256Plus::seed_from_u64(42);
1477                    let xt = egobox_doe::Lhs::new(&array![[-$limit, $limit], [-$limit, $limit]])
1478                    .kind(egobox_doe::LhsKind::CenteredMaximin)
1479                    .with_rng(rng.clone())
1480                    .sample($nt);
1481
1482                    let yt = [<$func>](&xt);
1483                    let gp = GaussianProcess::<f64, [<$regr Mean>], [<$corr Corr>] >::params(
1484                        [<$regr Mean>]::default(),
1485                        [<$corr Corr>]::default(),
1486                    )
1487                    .fit(&Dataset::new(xt, yt))
1488                    .expect("GP fitting");
1489
1490                    let x = Array::random_using((2,), Uniform::new(-$limit, $limit), &mut rng);
1491                    //let x = array![3., 5.];
1492                    let xa: f64 = x[0];
1493                    let xb: f64 = x[1];
1494                    let e = 1e-5;
1495
1496                    let x = array![
1497                        [xa, xb],
1498                        [xa + e, xb],
1499                        [xa - e, xb],
1500                        [xa, xb + e],
1501                        [xa, xb - e]
1502                    ];
1503
1504                    let y_pred = gp.predict(&x).unwrap();
1505                    println!("value at [{},{}] = {}", xa, xb, y_pred);
1506                    let y_deriv = gp.predict_gradients(&x);
1507                    println!("deriv at [{},{}] = {}", xa, xb, y_deriv);
1508                    let true_deriv = [<d $func>](&array![[xa, xb]]);
1509                    println!("true deriv at [{},{}] = {}", xa, xb, true_deriv);
1510                    println!("jacob = at [{},{}] = {}", xa, xb, gp.predict_jacobian(&array![xa, xb]));
1511
1512                    let diff_g = (y_pred[1] - y_pred[2]) / (2. * e);
1513                    let diff_d = (y_pred[3] - y_pred[4]) / (2. * e);
1514
1515                    // test only if fdiff is not largely wrong
1516                    if (diff_g-true_deriv[[0, 0]]).abs() < 10. {
1517                        assert_rel_or_abs_error(y_deriv[[0, 0]], diff_g);
1518                    }
1519                    if (diff_d-true_deriv[[0, 1]]).abs() < 10. {
1520                        assert_rel_or_abs_error(y_deriv[[0, 1]], diff_d);
1521                    }
1522                }
1523            }
1524        };
1525    }
1526
1527    test_gp_derivatives!(Constant, SquaredExponential, sphere, 10., 10);
1528    test_gp_derivatives!(Linear, SquaredExponential, sphere, 10., 10);
1529    test_gp_derivatives!(Quadratic, SquaredExponential, sphere, 10., 10);
1530    test_gp_derivatives!(Constant, AbsoluteExponential, sphere, 10., 10);
1531    test_gp_derivatives!(Linear, AbsoluteExponential, norm1, 10., 16);
1532    test_gp_derivatives!(Quadratic, AbsoluteExponential, norm1, 10., 16);
1533    test_gp_derivatives!(Constant, Matern32, norm1, 10., 16);
1534    test_gp_derivatives!(Linear, Matern32, norm1, 10., 16);
1535    test_gp_derivatives!(Quadratic, Matern32, sphere, 10., 16);
1536    test_gp_derivatives!(Constant, Matern52, norm1, 10., 16);
1537    test_gp_derivatives!(Linear, Matern52, norm1, 10., 16);
1538    test_gp_derivatives!(Quadratic, Matern52, sphere, 10., 10);
1539
1540    #[allow(unused_macros)]
1541    macro_rules! test_gp_variance_derivatives {
1542        ($regr:ident, $corr:ident, $func:ident, $limit:expr_2021, $nt:expr_2021) => {
1543            paste! {
1544
1545                #[test]
1546                fn [<test_gp_variance_derivatives_ $regr:snake _ $corr:snake _ $func:snake>]() {
1547                    let mut rng = Xoshiro256Plus::seed_from_u64(42);
1548                    let xt = egobox_doe::Lhs::new(&array![[-$limit, $limit], [-$limit, $limit]]).with_rng(rng.clone()).sample($nt);
1549                    let yt = [<$func>](&xt);
1550                    println!(stringify!(<$func>));
1551
1552                    let gp = GaussianProcess::<f64, [<$regr Mean>], [<$corr Corr>] >::params(
1553                        [<$regr Mean>]::default(),
1554                        [<$corr Corr>]::default(),
1555                    )
1556                    .fit(&Dataset::new(xt, yt))
1557                    .expect("GP fitting");
1558
1559                    for _ in 0..10 {
1560                        let x = Array::random_using((2,), Uniform::new(-$limit, $limit), &mut rng);
1561                        let xa: f64 = x[0];
1562                        let xb: f64 = x[1];
1563                        let e = 1e-5;
1564
1565                        let x = array![
1566                            [xa, xb],
1567                            [xa + e, xb],
1568                            [xa - e, xb],
1569                            [xa, xb + e],
1570                            [xa, xb - e]
1571                        ];
1572                        println!("****************************************");
1573                        let y_pred = gp.predict(&x).unwrap();
1574                        println!("value at [{},{}] = {}", xa, xb, y_pred);
1575                        let y_deriv = gp.predict_gradients(&x);
1576                        println!("deriv at [{},{}] = {}", xa, xb, y_deriv);
1577                        let y_pred = gp.predict_var(&x).unwrap();
1578                        println!("variance at [{},{}] = {}", xa, xb, y_pred);
1579                        let y_deriv = gp.predict_var_gradients(&x);
1580                        println!("variance deriv at [{},{}] = {}", xa, xb, y_deriv);
1581
1582                        let diff_g = (y_pred[1] - y_pred[2]) / (2. * e);
1583                        let diff_d = (y_pred[3] - y_pred[4]) / (2. * e);
1584
1585                        assert_rel_or_abs_error(y_deriv[[0, 0]], diff_g);
1586                        assert_rel_or_abs_error(y_deriv[[0, 1]], diff_d);
1587                    }
1588                }
1589            }
1590        };
1591    }
1592
1593    test_gp_variance_derivatives!(Constant, SquaredExponential, sphere, 10., 100);
1594    test_gp_variance_derivatives!(Linear, SquaredExponential, sphere, 10., 100);
1595    test_gp_variance_derivatives!(Quadratic, SquaredExponential, sphere, 10., 100);
1596    // FIXME: exclude as it fails on testing-features CI: blas, nlopt...
1597    #[cfg(not(feature = "nlopt"))]
1598    test_gp_variance_derivatives!(Constant, AbsoluteExponential, norm1, 10., 100);
1599    test_gp_variance_derivatives!(Linear, AbsoluteExponential, norm1, 1., 50);
1600    test_gp_variance_derivatives!(Quadratic, AbsoluteExponential, sphere, 10., 100);
1601    test_gp_variance_derivatives!(Constant, Matern32, sphere, 10., 100);
1602    test_gp_variance_derivatives!(Linear, Matern32, norm1, 1., 50);
1603    test_gp_variance_derivatives!(Quadratic, Matern32, sphere, 10., 100);
1604    test_gp_variance_derivatives!(Constant, Matern52, sphere, 10., 100);
1605    test_gp_variance_derivatives!(Linear, Matern52, norm1, 1., 50);
1606    test_gp_variance_derivatives!(Quadratic, Matern52, sphere, 10., 100);
1607
1608    #[test]
1609    fn test_variance_derivatives() {
1610        let xt = egobox_doe::FullFactorial::new(&array![[-10., 10.], [-10., 10.]]).sample(10);
1611        let yt = sphere(&xt);
1612
1613        let gp = GaussianProcess::<f64, ConstantMean, SquaredExponentialCorr>::params(
1614            ConstantMean::default(),
1615            SquaredExponentialCorr::default(),
1616        )
1617        .fit(&Dataset::new(xt, yt))
1618        .expect("GP fitting");
1619
1620        for _ in 0..20 {
1621            let mut rng = Xoshiro256Plus::seed_from_u64(42);
1622            let x = Array::random_using((2,), Uniform::new(-10., 10.), &mut rng);
1623            let xa: f64 = x[0];
1624            let xb: f64 = x[1];
1625            let e = 1e-5;
1626
1627            let x = array![
1628                [xa, xb],
1629                [xa + e, xb],
1630                [xa - e, xb],
1631                [xa, xb + e],
1632                [xa, xb - e]
1633            ];
1634            let y_pred = gp.predict_var(&x).unwrap();
1635            println!("variance at [{xa},{xb}] = {y_pred}");
1636            let y_deriv = gp.predict_var_gradients(&x);
1637            println!("variance deriv at [{xa},{xb}] = {y_deriv}");
1638
1639            let diff_g = (y_pred[1] - y_pred[2]) / (2. * e);
1640            let diff_d = (y_pred[3] - y_pred[4]) / (2. * e);
1641
1642            if y_pred[0].abs() > 1e-1 && y_pred[0].abs() > 1e-1 {
1643                // do not test with fdiff when variance or deriv is too small
1644                assert_rel_or_abs_error(y_deriv[[0, 0]], diff_g);
1645            }
1646            if y_pred[0].abs() > 1e-1 && y_pred[0].abs() > 1e-1 {
1647                // do not test with fdiff when variance or deriv  is too small
1648                assert_rel_or_abs_error(y_deriv[[0, 1]], diff_d);
1649            }
1650        }
1651    }
1652
1653    #[test]
1654    fn test_fixed_theta() {
1655        let xt = array![[0.0], [1.0], [2.0], [3.0], [4.0]];
1656        let yt = array![0.0, 1.0, 1.5, 0.9, 1.0];
1657        let gp = Kriging::params()
1658            .fit(&Dataset::new(xt.clone(), yt.clone()))
1659            .expect("GP fit error");
1660        let default = ThetaTuning::default();
1661        assert_abs_diff_ne!(*gp.theta(), default.init());
1662        let expected = gp.theta();
1663
1664        let gp = Kriging::params()
1665            .theta_tuning(ThetaTuning::Fixed(expected.clone()))
1666            .fit(&Dataset::new(xt, yt))
1667            .expect("GP fit error");
1668        assert_abs_diff_eq!(*gp.theta(), expected);
1669    }
1670
1671    fn x2sinx(x: &Array2<f64>) -> Array1<f64> {
1672        ((x * x) * (x).mapv(|v| v.sin())).remove_axis(Axis(1))
1673    }
1674
1675    #[test]
1676    fn test_sampling() {
1677        let xdoe = array![[-8.5], [-4.0], [-3.0], [-1.0], [4.0], [7.5]];
1678        let ydoe = x2sinx(&xdoe);
1679        let krg = Kriging::<f64>::params()
1680            .fit(&Dataset::new(xdoe, ydoe))
1681            .expect("Kriging training");
1682        let n_plot = 35;
1683        let n_traj = 10;
1684        let (x_min, x_max) = (-10., 10.);
1685        let x = Array::linspace(x_min, x_max, n_plot)
1686            .into_shape_with_order((n_plot, 1))
1687            .unwrap();
1688        let trajs = krg.sample(&x, n_traj);
1689        assert_eq!(&[n_plot, n_traj], trajs.shape())
1690    }
1691
1692    #[test]
1693    fn test_sampling_eigen() {
1694        let xdoe = array![[-8.5], [-4.0], [-3.0], [-1.0], [4.0], [7.5]];
1695        let ydoe = x2sinx(&xdoe);
1696        let krg = Kriging::<f64>::params()
1697            .fit(&Dataset::new(xdoe, ydoe))
1698            .expect("Kriging training");
1699        let n_plot = 500;
1700        let n_traj = 10;
1701        let (x_min, x_max) = (-10., 10.);
1702        let x = Array::linspace(x_min, x_max, n_plot)
1703            .into_shape_with_order((n_plot, 1))
1704            .unwrap();
1705        let trajs = krg.sample_eig(&x, n_traj);
1706        assert_eq!(&[n_plot, n_traj], trajs.shape());
1707        assert!(!trajs.fold(false, |acc, v| acc || v.is_nan())); // check no nans
1708    }
1709
1710    fn assert_rel_or_abs_error(y_deriv: f64, fdiff: f64) {
1711        println!("analytic deriv = {y_deriv}, fdiff = {fdiff}");
1712        if fdiff.abs() < 1. {
1713            let atol = 1.;
1714            println!("Check absolute error: abs({y_deriv}) should be < {atol}");
1715            assert_abs_diff_eq!(y_deriv, 0.0, epsilon = atol); // check absolute when close to zero
1716        } else {
1717            let rtol = 6e-1;
1718            let rel_error = (y_deriv - fdiff).abs() / fdiff.abs(); // check relative
1719            println!("Check relative error: {rel_error} should be < {rtol}");
1720            assert_abs_diff_eq!(rel_error, 0.0, epsilon = rtol);
1721        }
1722    }
1723
1724    fn sin_linear(x: &Array2<f64>) -> Array2<f64> {
1725        // sin + linear trend
1726        let x1 = x.column(0).to_owned().mapv(|v| v.sin());
1727        let x2 = x.column(0).mapv(|v| 2. * v) + x.column(1).mapv(|v| 5. * v);
1728        (x1 + x2)
1729            .mapv(|v| v + 10.)
1730            .into_shape_with_order((x.nrows(), 1))
1731            .unwrap()
1732    }
1733
1734    #[test]
1735    fn test_bug_var_derivatives() {
1736        let _xt = egobox_doe::Lhs::new(&array![[-5., 10.], [-5., 10.]])
1737            .kind(LhsKind::Centered)
1738            .sample(12);
1739        let _yt = sin_linear(&_xt);
1740
1741        let xt = array![
1742            [6.875, -4.375],
1743            [-3.125, 1.875],
1744            [1.875, -1.875],
1745            [-4.375, 3.125],
1746            [8.125, 9.375],
1747            [4.375, 4.375],
1748            [0.625, 0.625],
1749            [9.375, 6.875],
1750            [5.625, 8.125],
1751            [-0.625, -3.125],
1752            [3.125, 5.625],
1753            [-1.875, -0.625]
1754        ];
1755        let yt = array![
1756            2.43286801,
1757            13.10840811,
1758            5.32908578,
1759            17.81862219,
1760            74.08849877,
1761            39.68137781,
1762            14.96009727,
1763            63.17475741,
1764            61.26331775,
1765            -7.46009727,
1766            44.39159189,
1767            2.17091422,
1768        ];
1769
1770        let gp = GaussianProcess::<f64, ConstantMean, SquaredExponentialCorr>::params(
1771            ConstantMean::default(),
1772            SquaredExponentialCorr::default(),
1773        )
1774        .theta_tuning(ThetaTuning::Fixed(array![
1775            f64::sqrt(2. * 0.0437386),
1776            f64::sqrt(2. * 0.00697978)
1777        ]))
1778        .fit(&Dataset::new(xt, yt))
1779        .expect("GP fitting");
1780
1781        let e = 5e-6;
1782        let xa = -1.3;
1783        let xb = 2.5;
1784        let x = array![
1785            [xa, xb],
1786            [xa + e, xb],
1787            [xa - e, xb],
1788            [xa, xb + e],
1789            [xa, xb - e]
1790        ];
1791        let y_pred = gp.predict_var(&x).unwrap();
1792        let y_deriv = gp.predict_var_gradients(&array![[xa, xb]]);
1793        let diff_g = (y_pred[1] - y_pred[2]) / (2. * e);
1794        let diff_d = (y_pred[3] - y_pred[4]) / (2. * e);
1795
1796        assert_abs_diff_eq!(y_deriv[[0, 0]], diff_g, epsilon = 1e-5);
1797        assert_abs_diff_eq!(y_deriv[[0, 1]], diff_d, epsilon = 1e-5);
1798    }
1799}