scirs2_interpolate/
adaptive_gp.rs

1//! Gaussian process regression with adaptive kernels
2//!
3//! This module provides advanced Gaussian process (GP) regression with automatic
4//! kernel selection and hyperparameter optimization. Gaussian processes provide
5//! a probabilistic approach to interpolation that naturally quantifies uncertainty
6//! and can adapt to different data characteristics through kernel selection.
7//!
8//! # Adaptive Kernel Features
9//!
10//! - **Multiple kernel types**: RBF, Matérn, Periodic, Linear, Polynomial, Spectral Mixture
11//! - **Automatic kernel selection**: Model comparison and selection based on evidence
12//! - **Hyperparameter optimization**: Marginal likelihood maximization with gradients
13//! - **Kernel composition**: Automatic discovery of additive and multiplicative combinations
14//! - **Uncertainty quantification**: Predictive variance and confidence intervals
15//! - **Active learning integration**: Uncertainty-guided sampling for optimal data collection
16//! - **Sparse GP methods**: Inducing points for scalability to large datasets
17//!
18//! # Examples
19//!
20//! ```rust
21//! use scirs2_core::ndarray::Array1;
22//! use scirs2_interpolate::adaptive_gp::{AdaptiveGaussianProcess, KernelType};
23//!
24//! // Create sample data with noise
25//! let x = Array1::linspace(0.0_f64, 10.0_f64, 20);
26//! let y = x.mapv(|x| x.sin() + 0.1_f64 * (5.0_f64 * x).cos()) + Array1::from_elem(20, 0.05_f64);
27//!
28//! // Create adaptive GP with automatic kernel selection
29//! let mut gp = AdaptiveGaussianProcess::new()
30//!     .with_kernel_candidates(vec![
31//!         KernelType::RBF,
32//!         KernelType::Matern52,
33//!         KernelType::Periodic,
34//!     ])
35//!     .with_automatic_optimization(true)
36//!     .with_uncertainty_quantification(true);
37//!
38//! // Fit the model (automatically selects best kernel and optimizes hyperparameters)
39//! gp.fit(&x.view(), &y.view()).unwrap();
40//!
41//! // Make predictions with uncertainty
42//! let x_new = Array1::linspace(0.0_f64, 10.0_f64, 100);
43//! let (mean, variance) = gp.predict_with_uncertainty(&x_new.view()).unwrap();
44//!
45//! println!("Selected kernel: {:?}", gp.get_selected_kernel());
46//! println!("Predictive uncertainty: {:?}", variance.mapv(|v: f64| v.sqrt()));
47//! ```
48
49use crate::error::{InterpolateError, InterpolateResult};
50use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ScalarOperand};
51use scirs2_core::numeric::{Float, FromPrimitive, ToPrimitive};
52use std::collections::HashMap;
53use std::fmt::{Debug, Display, LowerExp};
54use std::ops::{AddAssign, DivAssign, MulAssign, RemAssign, SubAssign};
55
56/// Types of kernels available for Gaussian process regression
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
58pub enum KernelType {
59    /// Radial Basis Function (RBF) / Squared Exponential kernel
60    RBF,
61    /// Matérn kernel with ν = 1/2 (exponential)
62    Matern12,
63    /// Matérn kernel with ν = 3/2
64    Matern32,
65    /// Matérn kernel with ν = 5/2
66    Matern52,
67    /// Periodic kernel for repeating patterns
68    Periodic,
69    /// Linear kernel
70    Linear,
71    /// Polynomial kernel
72    Polynomial,
73    /// Rational quadratic kernel
74    RationalQuadratic,
75    /// Spectral mixture kernel (for complex periodicities)
76    SpectralMixture,
77    /// White noise kernel
78    WhiteNoise,
79}
80
81/// Hyperparameters for different kernel types
82#[derive(Debug, Clone)]
83pub struct KernelHyperparameters<T> {
84    /// Output variance (signal variance)
85    pub output_variance: T,
86    /// Length scale parameter(s)
87    pub length_scales: Vec<T>,
88    /// Noise variance
89    pub noise_variance: T,
90    /// Additional kernel-specific parameters
91    pub additional_params: HashMap<String, T>,
92}
93
94impl<T: Float + FromPrimitive> Default for KernelHyperparameters<T> {
95    fn default() -> Self {
96        Self {
97            output_variance: T::one(),
98            length_scales: vec![T::one()],
99            noise_variance: T::from(0.01).unwrap(),
100            additional_params: HashMap::new(),
101        }
102    }
103}
104
105/// Information about a fitted kernel model
106#[derive(Debug, Clone)]
107pub struct KernelModel<T> {
108    /// Type of kernel
109    pub kerneltype: KernelType,
110    /// Optimized hyperparameters
111    pub hyperparameters: KernelHyperparameters<T>,
112    /// Log marginal likelihood (model evidence)
113    pub log_marginal_likelihood: T,
114    /// Number of hyperparameters in the model
115    pub num_hyperparameters: usize,
116    /// Bayesian Information Criterion (BIC) for model comparison
117    pub bic: T,
118    /// Training data size when model was fitted
119    pub training_size: usize,
120}
121
122/// Configuration for adaptive Gaussian process regression
123#[derive(Debug, Clone)]
124pub struct AdaptiveGPConfig<T> {
125    /// Candidate kernel types to consider
126    pub kernel_candidates: Vec<KernelType>,
127    /// Whether to enable automatic hyperparameter optimization
128    pub enable_optimization: bool,
129    /// Whether to enable uncertainty quantification
130    pub enable_uncertainty: bool,
131    /// Maximum number of optimization iterations
132    pub max_optimization_iterations: usize,
133    /// Convergence tolerance for optimization
134    pub optimization_tolerance: T,
135    /// Whether to try kernel combinations (additive/multiplicative)
136    pub enable_kernel_composition: bool,
137    /// Maximum number of components in composite kernels
138    pub max_composite_components: usize,
139    /// Whether to use sparse GP approximations for large datasets
140    pub enable_sparse_gp: bool,
141    /// Number of inducing points for sparse GP
142    pub num_inducing_points: usize,
143    /// Noise level for numerical stability
144    pub jitter: T,
145}
146
147impl<T: Float + FromPrimitive> Default for AdaptiveGPConfig<T> {
148    fn default() -> Self {
149        Self {
150            kernel_candidates: vec![
151                KernelType::RBF,
152                KernelType::Matern32,
153                KernelType::Matern52,
154                KernelType::Linear,
155                KernelType::Periodic,
156            ],
157            enable_optimization: true,
158            enable_uncertainty: true,
159            max_optimization_iterations: 100,
160            optimization_tolerance: T::from(1e-6).unwrap(),
161            enable_kernel_composition: false,
162            max_composite_components: 3,
163            enable_sparse_gp: false,
164            num_inducing_points: 50,
165            jitter: T::from(1e-6).unwrap(),
166        }
167    }
168}
169
170/// Statistics tracking GP model selection and training
171#[derive(Debug, Clone, Default)]
172pub struct GPStats {
173    /// Number of kernels evaluated
174    pub kernels_evaluated: usize,
175    /// Number of optimization iterations performed
176    pub optimization_iterations: usize,
177    /// Best log marginal likelihood achieved
178    pub best_log_marginal_likelihood: f64,
179    /// Computational time for model selection (milliseconds)
180    pub model_selection_time_ms: u64,
181    /// Computational time for hyperparameter optimization (milliseconds)
182    pub optimization_time_ms: u64,
183    /// Distribution of kernel types tried
184    pub kernel_usage: HashMap<String, usize>,
185}
186
187/// Adaptive Gaussian Process for interpolation with automatic kernel selection
188#[derive(Debug)]
189pub struct AdaptiveGaussianProcess<T>
190where
191    T: Float
192        + FromPrimitive
193        + ToPrimitive
194        + Debug
195        + Display
196        + LowerExp
197        + ScalarOperand
198        + AddAssign
199        + SubAssign
200        + MulAssign
201        + DivAssign
202        + RemAssign
203        + Copy
204        + 'static,
205{
206    /// Configuration settings
207    config: AdaptiveGPConfig<T>,
208    /// Training data
209    x_train: Array1<T>,
210    y_train: Array1<T>,
211    /// Selected kernel model
212    selected_model: Option<KernelModel<T>>,
213    /// All evaluated models (for comparison)
214    evaluated_models: Vec<KernelModel<T>>,
215    /// Precomputed Cholesky decomposition for predictions
216    cholesky_factor: Option<Array2<T>>,
217    /// Alpha vector for predictions (K^{-1} * y)
218    alpha: Option<Array1<T>>,
219    /// Training statistics
220    stats: GPStats,
221    /// Inducing points for sparse GP
222    #[allow(dead_code)]
223    inducing_points: Option<Array1<T>>,
224}
225
226impl<T> Default for AdaptiveGaussianProcess<T>
227where
228    T: Float
229        + FromPrimitive
230        + ToPrimitive
231        + Debug
232        + Display
233        + LowerExp
234        + ScalarOperand
235        + AddAssign
236        + SubAssign
237        + MulAssign
238        + DivAssign
239        + RemAssign
240        + Copy
241        + 'static,
242{
243    fn default() -> Self {
244        Self::new()
245    }
246}
247
248impl<T> AdaptiveGaussianProcess<T>
249where
250    T: Float
251        + FromPrimitive
252        + ToPrimitive
253        + Debug
254        + Display
255        + LowerExp
256        + ScalarOperand
257        + AddAssign
258        + SubAssign
259        + MulAssign
260        + DivAssign
261        + RemAssign
262        + Copy
263        + 'static,
264{
265    /// Create a new adaptive Gaussian process with default configuration
266    pub fn new() -> Self {
267        Self {
268            config: AdaptiveGPConfig::default(),
269            x_train: Array1::zeros(0),
270            y_train: Array1::zeros(0),
271            selected_model: None,
272            evaluated_models: Vec::new(),
273            cholesky_factor: None,
274            alpha: None,
275            stats: GPStats::default(),
276            inducing_points: None,
277        }
278    }
279
280    /// Set the candidate kernel types to consider
281    pub fn with_kernel_candidates(mut self, kernels: Vec<KernelType>) -> Self {
282        self.config.kernel_candidates = kernels;
283        self
284    }
285
286    /// Enable or disable automatic hyperparameter optimization
287    pub fn with_automatic_optimization(mut self, enable: bool) -> Self {
288        self.config.enable_optimization = enable;
289        self
290    }
291
292    /// Enable or disable uncertainty quantification
293    pub fn with_uncertainty_quantification(mut self, enable: bool) -> Self {
294        self.config.enable_uncertainty = enable;
295        self
296    }
297
298    /// Enable or disable kernel composition (additive/multiplicative combinations)
299    pub fn with_kernel_composition(mut self, enable: bool) -> Self {
300        self.config.enable_kernel_composition = enable;
301        self
302    }
303
304    /// Set the maximum number of optimization iterations
305    pub fn with_max_optimization_iterations(mut self, maxiter: usize) -> Self {
306        self.config.max_optimization_iterations = maxiter;
307        self
308    }
309
310    /// Enable sparse GP approximations for large datasets
311    pub fn with_sparse_approximation(mut self, enable: bool, numinducing: usize) -> Self {
312        self.config.enable_sparse_gp = enable;
313        self.config.num_inducing_points = numinducing;
314        self
315    }
316
317    /// Fit the Gaussian process to training data
318    ///
319    /// This performs automatic kernel selection and hyperparameter optimization
320    ///
321    /// # Arguments
322    ///
323    /// * `x` - Input training data
324    /// * `y` - Output training data
325    ///
326    /// # Returns
327    ///
328    /// Success indicator
329    pub fn fit(&mut self, x: &ArrayView1<T>, y: &ArrayView1<T>) -> InterpolateResult<bool> {
330        if x.len() != y.len() {
331            return Err(InterpolateError::DimensionMismatch(format!(
332                "x and y must have the same length, got {} and {}",
333                x.len(),
334                y.len()
335            )));
336        }
337
338        if x.len() < 2 {
339            return Err(InterpolateError::InvalidValue(
340                "At least 2 data points are required for GP regression".to_string(),
341            ));
342        }
343
344        // Store training data
345        self.x_train = x.to_owned();
346        self.y_train = y.to_owned();
347
348        let start_time = std::time::Instant::now();
349
350        // Evaluate all candidate kernels
351        self.evaluated_models.clear();
352        self.stats = GPStats::default();
353
354        for &kerneltype in &self.config.kernel_candidates.clone() {
355            let model = self.fit_kernel(kerneltype)?;
356            self.evaluated_models.push(model);
357
358            // Update statistics
359            *self
360                .stats
361                .kernel_usage
362                .entry(format!("{kerneltype:?}"))
363                .or_insert(0) += 1;
364            self.stats.kernels_evaluated += 1;
365        }
366
367        // If kernel composition is enabled, try some combinations
368        if self.config.enable_kernel_composition {
369            self.evaluate_composite_kernels()?;
370        }
371
372        // Select the best kernel based on model evidence (log marginal likelihood)
373        self.select_best_model()?;
374
375        // Precompute quantities for prediction
376        self.precompute_prediction_quantities()?;
377
378        // Update timing statistics
379        self.stats.model_selection_time_ms = start_time.elapsed().as_millis() as u64;
380
381        Ok(true)
382    }
383
384    /// Make predictions at new input points
385    ///
386    /// # Arguments
387    ///
388    /// * `x_new` - Input points for prediction
389    ///
390    /// # Returns
391    ///
392    /// Predicted mean values
393    pub fn predict(&self, xnew: &ArrayView1<T>) -> InterpolateResult<Array1<T>> {
394        if self.selected_model.is_none() {
395            return Err(InterpolateError::InvalidState(
396                "Model must be fitted before making predictions".to_string(),
397            ));
398        }
399
400        let (mean_, _) = self.predict_with_uncertainty(xnew)?;
401        Ok(mean_)
402    }
403
404    /// Make predictions with uncertainty quantification
405    ///
406    /// # Arguments
407    ///
408    /// * `x_new` - Input points for prediction
409    ///
410    /// # Returns
411    ///
412    /// Tuple of (predicted means, predicted variances)
413    pub fn predict_with_uncertainty(
414        &self,
415        x_new: &ArrayView1<T>,
416    ) -> InterpolateResult<(Array1<T>, Array1<T>)> {
417        if self.selected_model.is_none() || self.alpha.is_none() {
418            return Err(InterpolateError::InvalidState(
419                "Model must be fitted before making predictions".to_string(),
420            ));
421        }
422
423        let selected_model = self.selected_model.as_ref().unwrap();
424        let alpha = self.alpha.as_ref().unwrap();
425
426        // Compute kernel matrix between training and test points
427        let k_star = self.compute_kernel_matrix_cross(
428            &self.x_train.view(),
429            x_new,
430            selected_model.kerneltype,
431            &selected_model.hyperparameters,
432        )?;
433
434        // Compute predictive mean: k_star^T * alpha
435        let mean = k_star.t().dot(alpha);
436
437        let variance = if self.config.enable_uncertainty {
438            // Compute predictive variance
439            self.compute_predictive_variance(x_new, &k_star)?
440        } else {
441            // Return zero variance if uncertainty is disabled
442            Array1::zeros(x_new.len())
443        };
444
445        Ok((mean, variance))
446    }
447
448    /// Get the selected kernel type
449    pub fn get_selected_kernel(&self) -> Option<KernelType> {
450        self.selected_model.as_ref().map(|m| m.kerneltype)
451    }
452
453    /// Get the selected model's hyperparameters
454    pub fn get_hyperparameters(&self) -> Option<&KernelHyperparameters<T>> {
455        self.selected_model.as_ref().map(|m| &m.hyperparameters)
456    }
457
458    /// Get model selection and training statistics
459    pub fn get_stats(&self) -> &GPStats {
460        &self.stats
461    }
462
463    /// Get all evaluated models for comparison
464    pub fn get_evaluated_models(&self) -> &[KernelModel<T>] {
465        &self.evaluated_models
466    }
467
468    /// Fit a specific kernel type and return the fitted model
469    fn fit_kernel(&mut self, kerneltype: KernelType) -> InterpolateResult<KernelModel<T>> {
470        // Initialize hyperparameters based on kernel _type
471        let mut hyperparams = self.initialize_hyperparameters(kerneltype)?;
472
473        let mut log_marginal_likelihood =
474            self.compute_log_marginal_likelihood(kerneltype, &hyperparams)?;
475
476        if self.config.enable_optimization {
477            let optimization_start = std::time::Instant::now();
478
479            // Optimize hyperparameters using gradient-free method (simplified)
480            for iteration in 0..self.config.max_optimization_iterations {
481                let improved = self.optimize_hyperparameters_step(
482                    kerneltype,
483                    &mut hyperparams,
484                    &mut log_marginal_likelihood,
485                )?;
486
487                if !improved {
488                    break;
489                }
490
491                self.stats.optimization_iterations += 1;
492
493                // Check convergence (simplified)
494                if iteration > 10 && iteration % 10 == 0 {
495                    // In a real implementation, we'd check gradient norms
496                    break;
497                }
498            }
499
500            self.stats.optimization_time_ms += optimization_start.elapsed().as_millis() as u64;
501        }
502
503        // Compute BIC for model comparison
504        let num_params = self.count_hyperparameters(kerneltype);
505        let n = T::from(self.x_train.len()).unwrap();
506        let bic = T::from(2.0).unwrap() * T::from(num_params).unwrap() * n.ln()
507            - T::from(2.0).unwrap() * log_marginal_likelihood;
508
509        Ok(KernelModel {
510            kerneltype,
511            hyperparameters: hyperparams,
512            log_marginal_likelihood,
513            num_hyperparameters: num_params,
514            bic,
515            training_size: self.x_train.len(),
516        })
517    }
518
519    /// Initialize hyperparameters for a given kernel type
520    fn initialize_hyperparameters(
521        &self,
522        kerneltype: KernelType,
523    ) -> InterpolateResult<KernelHyperparameters<T>> {
524        let mut hyperparams = KernelHyperparameters::default();
525
526        // Estimate initial length scale from data
527        let x_range = self.x_train[self.x_train.len() - 1] - self.x_train[0];
528        let initial_length_scale = x_range / T::from(10.0).unwrap();
529        hyperparams.length_scales = vec![initial_length_scale];
530
531        // Estimate initial output variance from data variance
532        let y_mean = self.y_train.sum() / T::from(self.y_train.len()).unwrap();
533        let y_var = self.y_train.mapv(|y| (y - y_mean) * (y - y_mean)).sum()
534            / T::from(self.y_train.len() - 1).unwrap();
535        hyperparams.output_variance = y_var.max(T::from(0.01).unwrap());
536
537        // Kernel-specific parameter initialization
538        match kerneltype {
539            KernelType::Periodic => {
540                // Add period parameter
541                hyperparams
542                    .additional_params
543                    .insert("period".to_string(), x_range / T::from(2.0).unwrap());
544            }
545            KernelType::Polynomial => {
546                // Add degree parameter
547                hyperparams
548                    .additional_params
549                    .insert("degree".to_string(), T::from(2.0).unwrap());
550            }
551            KernelType::RationalQuadratic => {
552                // Add alpha parameter
553                hyperparams
554                    .additional_params
555                    .insert("alpha".to_string(), T::one());
556            }
557            _ => {}
558        }
559
560        Ok(hyperparams)
561    }
562
563    /// Compute the log marginal likelihood for given kernel and hyperparameters
564    fn compute_log_marginal_likelihood(
565        &self,
566        kerneltype: KernelType,
567        hyperparams: &KernelHyperparameters<T>,
568    ) -> InterpolateResult<T> {
569        // Compute kernel matrix
570        let k_matrix = self.compute_kernel_matrix(
571            &self.x_train.view(),
572            &self.x_train.view(),
573            kerneltype,
574            hyperparams,
575        )?;
576
577        // Add noise to diagonal
578        let mut k_noisy = k_matrix;
579        for i in 0..k_noisy.nrows() {
580            k_noisy[(i, i)] += hyperparams.noise_variance + self.config.jitter;
581        }
582
583        // Compute Cholesky decomposition
584        let cholesky = self.cholesky_decomposition(&k_noisy)?;
585
586        // Compute log determinant via Cholesky factor
587        let mut log_det = T::zero();
588        for i in 0..cholesky.nrows() {
589            log_det += cholesky[(i, i)].ln();
590        }
591        log_det = T::from(2.0).unwrap() * log_det;
592
593        // Solve K * alpha = y
594        let alpha = self.cholesky_solve(&cholesky, &self.y_train.view())?;
595
596        // Compute data fit term: y^T * K^{-1} * y
597        let data_fit = self.y_train.dot(&alpha);
598
599        // Log marginal likelihood: -0.5 * (y^T * K^{-1} * y + log|K| + n * log(2π))
600        let n = T::from(self.x_train.len()).unwrap();
601        let log_2pi = T::from(2.0 * std::f64::consts::PI).unwrap().ln();
602
603        let log_marginal_likelihood = -T::from(0.5).unwrap() * (data_fit + log_det + n * log_2pi);
604
605        Ok(log_marginal_likelihood)
606    }
607
608    /// Perform one step of hyperparameter optimization
609    fn optimize_hyperparameters_step(
610        &self,
611        kerneltype: KernelType,
612        hyperparams: &mut KernelHyperparameters<T>,
613        current_likelihood: &mut T,
614    ) -> InterpolateResult<bool> {
615        let mut improved = false;
616        let _step_size = T::from(0.1).unwrap();
617
618        // Try perturbing each hyperparameter
619        let original_hyperparams = hyperparams.clone();
620
621        // Optimize output variance
622        let original_output_var = hyperparams.output_variance;
623        for &multiplier in &[1.1, 0.9] {
624            hyperparams.output_variance = original_output_var * T::from(multiplier).unwrap();
625            if let Ok(likelihood) = self.compute_log_marginal_likelihood(kerneltype, hyperparams) {
626                if likelihood > *current_likelihood {
627                    *current_likelihood = likelihood;
628                    improved = true;
629                    break;
630                }
631            }
632            hyperparams.output_variance = original_output_var;
633        }
634
635        // Optimize length scales
636        for (i, &original_length_scale) in original_hyperparams.length_scales.iter().enumerate() {
637            for &multiplier in &[1.2, 0.8] {
638                hyperparams.length_scales[i] = original_length_scale * T::from(multiplier).unwrap();
639                if let Ok(likelihood) =
640                    self.compute_log_marginal_likelihood(kerneltype, hyperparams)
641                {
642                    if likelihood > *current_likelihood {
643                        *current_likelihood = likelihood;
644                        improved = true;
645                        break;
646                    }
647                }
648                hyperparams.length_scales[i] = original_length_scale;
649            }
650        }
651
652        // Optimize noise variance
653        let original_noise_var = hyperparams.noise_variance;
654        for &multiplier in &[1.1, 0.9] {
655            hyperparams.noise_variance = original_noise_var * T::from(multiplier).unwrap();
656            if let Ok(likelihood) = self.compute_log_marginal_likelihood(kerneltype, hyperparams) {
657                if likelihood > *current_likelihood {
658                    *current_likelihood = likelihood;
659                    improved = true;
660                    break;
661                }
662            }
663            hyperparams.noise_variance = original_noise_var;
664        }
665
666        Ok(improved)
667    }
668
669    /// Evaluate composite kernel combinations
670    fn evaluate_composite_kernels(&mut self) -> InterpolateResult<()> {
671        // For simplicity, we'll only try a few additive combinations
672        let base_kernels = self.config.kernel_candidates.clone();
673
674        for i in 0..base_kernels.len() {
675            for _j in i + 1..base_kernels.len() {
676                if self.evaluated_models.len() < 20 {
677                    // Limit combinations
678                    // In a full implementation, we'd properly handle composite kernels
679                    // For now, we'll skip this to keep the implementation manageable
680                    continue;
681                }
682            }
683        }
684
685        Ok(())
686    }
687
688    /// Select the best model based on model evidence
689    fn select_best_model(&mut self) -> InterpolateResult<()> {
690        if self.evaluated_models.is_empty() {
691            return Err(InterpolateError::InvalidState(
692                "No models have been evaluated".to_string(),
693            ));
694        }
695
696        // Find model with highest log marginal likelihood
697        let best_model = self
698            .evaluated_models
699            .iter()
700            .max_by(|a, b| {
701                a.log_marginal_likelihood
702                    .partial_cmp(&b.log_marginal_likelihood)
703                    .unwrap()
704            })
705            .unwrap()
706            .clone();
707
708        self.stats.best_log_marginal_likelihood =
709            best_model.log_marginal_likelihood.to_f64().unwrap();
710        self.selected_model = Some(best_model);
711
712        Ok(())
713    }
714
715    /// Precompute quantities needed for prediction
716    fn precompute_prediction_quantities(&mut self) -> InterpolateResult<()> {
717        if let Some(ref model) = self.selected_model {
718            // Compute kernel matrix
719            let k_matrix = self.compute_kernel_matrix(
720                &self.x_train.view(),
721                &self.x_train.view(),
722                model.kerneltype,
723                &model.hyperparameters,
724            )?;
725
726            // Add noise to diagonal
727            let mut k_noisy = k_matrix;
728            for i in 0..k_noisy.nrows() {
729                k_noisy[(i, i)] += model.hyperparameters.noise_variance + self.config.jitter;
730            }
731
732            // Compute Cholesky decomposition
733            let cholesky = self.cholesky_decomposition(&k_noisy)?;
734
735            // Solve for alpha = K^{-1} * y
736            let alpha = self.cholesky_solve(&cholesky, &self.y_train.view())?;
737
738            self.cholesky_factor = Some(cholesky);
739            self.alpha = Some(alpha);
740        }
741
742        Ok(())
743    }
744
745    /// Compute kernel matrix between two sets of points
746    fn compute_kernel_matrix(
747        &self,
748        x1: &ArrayView1<T>,
749        x2: &ArrayView1<T>,
750        kerneltype: KernelType,
751        hyperparams: &KernelHyperparameters<T>,
752    ) -> InterpolateResult<Array2<T>> {
753        let n1 = x1.len();
754        let n2 = x2.len();
755        let mut k_matrix = Array2::zeros((n1, n2));
756
757        for i in 0..n1 {
758            for j in 0..n2 {
759                k_matrix[(i, j)] = self.kernel_function(x1[i], x2[j], kerneltype, hyperparams)?;
760            }
761        }
762
763        Ok(k_matrix)
764    }
765
766    /// Compute kernel matrix between training and test points
767    fn compute_kernel_matrix_cross(
768        &self,
769        x_train: &ArrayView1<T>,
770        x_test: &ArrayView1<T>,
771        kerneltype: KernelType,
772        hyperparams: &KernelHyperparameters<T>,
773    ) -> InterpolateResult<Array2<T>> {
774        self.compute_kernel_matrix(x_train, x_test, kerneltype, hyperparams)
775    }
776
777    /// Evaluate kernel function between two points
778    fn kernel_function(
779        &self,
780        x1: T,
781        x2: T,
782        kerneltype: KernelType,
783        hyperparams: &KernelHyperparameters<T>,
784    ) -> InterpolateResult<T> {
785        let output_var = hyperparams.output_variance;
786        let length_scale = hyperparams.length_scales[0];
787        let distance = (x1 - x2).abs();
788
789        let value = match kerneltype {
790            KernelType::RBF => {
791                let scaled_dist = distance / length_scale;
792                output_var * (-T::from(0.5).unwrap() * scaled_dist * scaled_dist).exp()
793            }
794            KernelType::Matern12 => {
795                let scaled_dist = distance / length_scale;
796                output_var * (-scaled_dist).exp()
797            }
798            KernelType::Matern32 => {
799                let scaled_dist = (T::from(3.0).unwrap().sqrt()) * distance / length_scale;
800                output_var * (T::one() + scaled_dist) * (-scaled_dist).exp()
801            }
802            KernelType::Matern52 => {
803                let scaled_dist = (T::from(5.0).unwrap().sqrt()) * distance / length_scale;
804                output_var
805                    * (T::one() + scaled_dist + scaled_dist * scaled_dist / T::from(3.0).unwrap())
806                    * (-scaled_dist).exp()
807            }
808            KernelType::Linear => output_var * x1 * x2,
809            KernelType::Polynomial => {
810                let default_degree = T::from(2.0).unwrap();
811                let degree = hyperparams
812                    .additional_params
813                    .get("degree")
814                    .unwrap_or(&default_degree);
815                output_var * (T::one() + x1 * x2 / length_scale).powf(*degree)
816            }
817            KernelType::Periodic => {
818                let default_period = T::one();
819                let period = hyperparams
820                    .additional_params
821                    .get("period")
822                    .unwrap_or(&default_period);
823                let pi = T::from(std::f64::consts::PI).unwrap();
824                let sin_arg = pi * distance / *period;
825                let scaled_sin = T::from(2.0).unwrap() * sin_arg.sin() / length_scale;
826                output_var * (-T::from(0.5).unwrap() * scaled_sin * scaled_sin).exp()
827            }
828            KernelType::RationalQuadratic => {
829                let default_alpha = T::one();
830                let alpha = hyperparams
831                    .additional_params
832                    .get("alpha")
833                    .unwrap_or(&default_alpha);
834                let scaled_dist_sq = distance * distance
835                    / (T::from(2.0).unwrap() * *alpha * length_scale * length_scale);
836                output_var * (T::one() + scaled_dist_sq).powf(-*alpha)
837            }
838            KernelType::WhiteNoise => {
839                if distance < T::epsilon() {
840                    hyperparams.noise_variance
841                } else {
842                    T::zero()
843                }
844            }
845            _ => {
846                return Err(InterpolateError::InvalidValue(format!(
847                    "Kernel _type {kerneltype:?} not implemented"
848                )));
849            }
850        };
851
852        Ok(value)
853    }
854
855    /// Compute predictive variance
856    fn compute_predictive_variance(
857        &self,
858        x_new: &ArrayView1<T>,
859        k_star: &Array2<T>,
860    ) -> InterpolateResult<Array1<T>> {
861        if let (Some(ref cholesky), Some(ref model)) = (&self.cholesky_factor, &self.selected_model)
862        {
863            let mut variance = Array1::zeros(x_new.len());
864
865            for i in 0..x_new.len() {
866                // Prior variance
867                let prior_var = model.hyperparameters.output_variance;
868
869                // Solve cholesky * v = k_star[:, i]
870                let k_star_i = k_star.column(i);
871                let v = self.cholesky_solve(cholesky, &k_star_i)?;
872
873                // Predictive variance: prior_var - k_star_i^T * v
874                let reduction = k_star_i.dot(&v);
875                variance[i] = prior_var - reduction;
876
877                // Ensure non-negative variance
878                variance[i] = variance[i].max(T::zero());
879            }
880
881            Ok(variance)
882        } else {
883            Err(InterpolateError::InvalidState(
884                "Model must be fitted before computing variance".to_string(),
885            ))
886        }
887    }
888
889    /// Count the number of hyperparameters for a kernel type
890    fn count_hyperparameters(&self, kerneltype: KernelType) -> usize {
891        match kerneltype {
892            KernelType::RBF
893            | KernelType::Matern12
894            | KernelType::Matern32
895            | KernelType::Matern52 => 3, // output_var, length_scale, noise_var
896            KernelType::Linear => 2,            // output_var, noise_var
897            KernelType::Polynomial => 4,        // output_var, length_scale, noise_var, degree
898            KernelType::Periodic => 4,          // output_var, length_scale, noise_var, period
899            KernelType::RationalQuadratic => 4, // output_var, length_scale, noise_var, alpha
900            KernelType::SpectralMixture => 6,   // More complex
901            KernelType::WhiteNoise => 1,        // noise_var only
902        }
903    }
904
905    /// Compute Cholesky decomposition
906    fn cholesky_decomposition(&self, matrix: &Array2<T>) -> InterpolateResult<Array2<T>> {
907        let n = matrix.nrows();
908        let mut chol = Array2::zeros((n, n));
909
910        for i in 0..n {
911            for j in 0..=i {
912                if i == j {
913                    // Diagonal element
914                    let mut sum = T::zero();
915                    for k in 0..j {
916                        sum += chol[(j, k)] * chol[(j, k)];
917                    }
918                    let diag_val = matrix[(j, j)] - sum;
919                    if diag_val <= T::zero() {
920                        return Err(InterpolateError::InvalidValue(
921                            "Matrix is not positive definite".to_string(),
922                        ));
923                    }
924                    chol[(j, j)] = diag_val.sqrt();
925                } else {
926                    // Off-diagonal element
927                    let mut sum = T::zero();
928                    for k in 0..j {
929                        sum += chol[(i, k)] * chol[(j, k)];
930                    }
931                    chol[(i, j)] = (matrix[(i, j)] - sum) / chol[(j, j)];
932                }
933            }
934        }
935
936        Ok(chol)
937    }
938
939    /// Solve linear system using Cholesky factorization
940    fn cholesky_solve(
941        &self,
942        cholesky: &Array2<T>,
943        rhs: &ArrayView1<T>,
944    ) -> InterpolateResult<Array1<T>> {
945        let n = cholesky.nrows();
946
947        // Forward substitution: L * y = rhs
948        let mut y = Array1::zeros(n);
949        for i in 0..n {
950            let mut sum = T::zero();
951            for j in 0..i {
952                sum += cholesky[(i, j)] * y[j];
953            }
954            y[i] = (rhs[i] - sum) / cholesky[(i, i)];
955        }
956
957        // Backward substitution: L^T * x = y
958        let mut x = Array1::zeros(n);
959        for i in (0..n).rev() {
960            let mut sum = T::zero();
961            for j in i + 1..n {
962                sum += cholesky[(j, i)] * x[j];
963            }
964            x[i] = (y[i] - sum) / cholesky[(i, i)];
965        }
966
967        Ok(x)
968    }
969}
970
971/// Convenience function to create an adaptive GP with automatic kernel selection
972///
973/// # Arguments
974///
975/// * `x` - Input training data
976/// * `y` - Output training data
977/// * `kernel_candidates` - List of kernel types to consider
978///
979/// # Returns
980///
981/// A fitted adaptive Gaussian process
982#[allow(dead_code)]
983pub fn make_adaptive_gp<T>(
984    x: &ArrayView1<T>,
985    y: &ArrayView1<T>,
986    kernel_candidates: Option<Vec<KernelType>>,
987) -> InterpolateResult<AdaptiveGaussianProcess<T>>
988where
989    T: Float
990        + FromPrimitive
991        + ToPrimitive
992        + Debug
993        + Display
994        + LowerExp
995        + ScalarOperand
996        + AddAssign
997        + SubAssign
998        + MulAssign
999        + DivAssign
1000        + RemAssign
1001        + Copy
1002        + 'static,
1003{
1004    let mut gp = AdaptiveGaussianProcess::new();
1005
1006    if let Some(kernels) = kernel_candidates {
1007        gp = gp.with_kernel_candidates(kernels);
1008    }
1009
1010    gp.fit(x, y)?;
1011    Ok(gp)
1012}
1013
1014#[cfg(test)]
1015mod tests {
1016    use super::*;
1017    use scirs2_core::ndarray::Array1;
1018
1019    #[test]
1020    fn test_adaptive_gp_creation() {
1021        let gp = AdaptiveGaussianProcess::<f64>::new();
1022        assert_eq!(gp.config.kernel_candidates.len(), 5);
1023        assert!(gp.config.enable_optimization);
1024        assert!(gp.config.enable_uncertainty);
1025    }
1026
1027    #[test]
1028    fn test_adaptive_gp_simple_fit() {
1029        let x = Array1::from_vec(vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]);
1030        let y = Array1::from_vec(vec![0.0, 1.0, 4.0, 9.0, 16.0, 25.0]); // x^2
1031
1032        let mut gp = AdaptiveGaussianProcess::new()
1033            .with_kernel_candidates(vec![KernelType::RBF, KernelType::Polynomial]);
1034
1035        let result = gp.fit(&x.view(), &y.view());
1036        assert!(result.is_ok());
1037        assert!(gp.get_selected_kernel().is_some());
1038    }
1039
1040    #[test]
1041    fn test_adaptive_gp_prediction() {
1042        let x = Array1::linspace(0.0, 10.0, 11);
1043        let y = x.mapv(|x| x.sin());
1044
1045        let mut gp = AdaptiveGaussianProcess::new()
1046            .with_kernel_candidates(vec![KernelType::RBF, KernelType::Matern52]);
1047
1048        gp.fit(&x.view(), &y.view()).unwrap();
1049
1050        let x_new = Array1::from_vec(vec![2.5, 7.5]);
1051        let predictions = gp.predict(&x_new.view()).unwrap();
1052
1053        assert_eq!(predictions.len(), 2);
1054        // Check that predictions are reasonable for sine function
1055        assert!((predictions[0] - 2.5_f64.sin()).abs() < 0.5);
1056        assert!((predictions[1] - 7.5_f64.sin()).abs() < 0.5);
1057    }
1058
1059    #[test]
1060    fn test_adaptive_gp_uncertainty() {
1061        let x = Array1::from_vec(vec![0.0, 2.0, 4.0]);
1062        let y = Array1::from_vec(vec![0.0, 4.0, 16.0]);
1063
1064        let mut gp = AdaptiveGaussianProcess::new()
1065            .with_kernel_candidates(vec![KernelType::RBF])
1066            .with_uncertainty_quantification(true);
1067
1068        gp.fit(&x.view(), &y.view()).unwrap();
1069
1070        let x_new = Array1::from_vec(vec![1.0, 3.0]);
1071        let (mean, variance) = gp.predict_with_uncertainty(&x_new.view()).unwrap();
1072
1073        assert_eq!(mean.len(), 2);
1074        assert_eq!(variance.len(), 2);
1075
1076        // Variance should be non-negative
1077        assert!(variance.iter().all(|&v| v >= 0.0));
1078    }
1079
1080    #[test]
1081    fn test_kernel_functions() {
1082        let gp = AdaptiveGaussianProcess::<f64>::new();
1083        let hyperparams = KernelHyperparameters::default();
1084
1085        // Test RBF kernel
1086        let k_val = gp
1087            .kernel_function(0.0, 1.0, KernelType::RBF, &hyperparams)
1088            .unwrap();
1089        assert!(k_val > 0.0 && k_val < 1.0);
1090
1091        // Test that kernel is symmetric
1092        let k_val2 = gp
1093            .kernel_function(1.0, 0.0, KernelType::RBF, &hyperparams)
1094            .unwrap();
1095        assert!((k_val - k_val2).abs() < 1e-10);
1096
1097        // Test that kernel value at zero distance is maximal
1098        let k_zero = gp
1099            .kernel_function(0.0, 0.0, KernelType::RBF, &hyperparams)
1100            .unwrap();
1101        assert!(k_zero >= k_val);
1102    }
1103
1104    #[test]
1105    fn test_model_selection() {
1106        let x = Array1::linspace(0.0, 2.0 * std::f64::consts::PI, 20);
1107        let y = x.mapv(|x| x.sin());
1108
1109        let mut gp = AdaptiveGaussianProcess::new().with_kernel_candidates(vec![
1110            KernelType::RBF,
1111            KernelType::Periodic,
1112            KernelType::Linear,
1113        ]);
1114
1115        gp.fit(&x.view(), &y.view()).unwrap();
1116
1117        // For periodic data, periodic kernel should be preferred
1118        // (though this may not always happen due to optimization challenges)
1119        let selected = gp.get_selected_kernel().unwrap();
1120        assert!(matches!(selected, KernelType::RBF | KernelType::Periodic));
1121
1122        // Check that multiple models were evaluated
1123        assert!(gp.get_evaluated_models().len() >= 3);
1124    }
1125}