sklears_semi_supervised/deep_learning/
deep_gaussian_processes.rs

1//! Deep Gaussian Processes for semi-supervised learning
2//!
3//! This module implements Deep Gaussian Processes (DGPs), which are hierarchical
4//! compositions of Gaussian processes that can model complex non-linear relationships
5//! and provide uncertainty quantification for semi-supervised learning tasks.
6
7use scirs2_core::ndarray_ext::{Array1, Array2, ArrayView1, ArrayView2, Axis};
8use scirs2_core::random::Random;
9// use scirs2_core::random::rand::seq::SliceRandom;
10use sklears_core::error::{Result, SklearsError};
11use sklears_core::traits::{Fit, Predict, PredictProba};
12use thiserror::Error;
13
14#[derive(Error, Debug)]
15pub enum DeepGaussianProcessError {
16    #[error("Invalid number of layers: {0}")]
17    InvalidNumLayers(usize),
18    #[error("Invalid inducing points: {0}")]
19    InvalidInducingPoints(usize),
20    #[error("Invalid kernel parameter: {0}")]
21    InvalidKernelParameter(f64),
22    #[error("Invalid learning rate: {0}")]
23    InvalidLearningRate(f64),
24    #[error("Invalid epochs: {0}")]
25    InvalidEpochs(usize),
26    #[error("Insufficient labeled samples: need at least 1")]
27    InsufficientLabeledSamples,
28    #[error("Shape mismatch: expected {expected:?}, got {actual:?}")]
29    ShapeMismatch {
30        expected: Vec<usize>,
31        actual: Vec<usize>,
32    },
33    #[error("Training failed: {0}")]
34    TrainingFailed(String),
35    #[error("Model not trained")]
36    ModelNotTrained,
37    #[error("Matrix operation failed: {0}")]
38    MatrixOperationFailed(String),
39}
40
41impl From<DeepGaussianProcessError> for SklearsError {
42    fn from(err: DeepGaussianProcessError) -> Self {
43        SklearsError::FitError(err.to_string())
44    }
45}
46
47/// Kernel function types for Gaussian Processes
48#[derive(Debug, Clone)]
49pub enum KernelType {
50    /// RBF
51    RBF { length_scale: f64, variance: f64 },
52    /// Matern32
53    Matern32 { length_scale: f64, variance: f64 },
54    /// Matern52
55    Matern52 { length_scale: f64, variance: f64 },
56    /// Linear
57    Linear { variance: f64, offset: f64 },
58}
59
60impl KernelType {
61    fn compute(&self, x1: &ArrayView1<f64>, x2: &ArrayView1<f64>) -> f64 {
62        match self {
63            KernelType::RBF {
64                length_scale,
65                variance,
66            } => {
67                let squared_distance = x1
68                    .iter()
69                    .zip(x2.iter())
70                    .map(|(a, b)| (a - b).powi(2))
71                    .sum::<f64>();
72                variance * (-0.5 * squared_distance / length_scale.powi(2)).exp()
73            }
74            KernelType::Matern32 {
75                length_scale,
76                variance,
77            } => {
78                let distance = x1
79                    .iter()
80                    .zip(x2.iter())
81                    .map(|(a, b)| (a - b).powi(2))
82                    .sum::<f64>()
83                    .sqrt();
84                let scaled_distance = (3.0_f64.sqrt() * distance) / length_scale;
85                variance * (1.0 + scaled_distance) * (-scaled_distance).exp()
86            }
87            KernelType::Matern52 {
88                length_scale,
89                variance,
90            } => {
91                let distance = x1
92                    .iter()
93                    .zip(x2.iter())
94                    .map(|(a, b)| (a - b).powi(2))
95                    .sum::<f64>()
96                    .sqrt();
97                let scaled_distance = (5.0_f64.sqrt() * distance) / length_scale;
98                variance
99                    * (1.0
100                        + scaled_distance
101                        + (5.0 * distance.powi(2)) / (3.0 * length_scale.powi(2)))
102                    * (-scaled_distance).exp()
103            }
104            KernelType::Linear { variance, offset } => {
105                let dot_product = x1.iter().zip(x2.iter()).map(|(a, b)| a * b).sum::<f64>();
106                variance * (dot_product + offset)
107            }
108        }
109    }
110
111    fn compute_matrix(&self, X1: &ArrayView2<f64>, X2: &ArrayView2<f64>) -> Array2<f64> {
112        let (n1, _) = X1.dim();
113        let (n2, _) = X2.dim();
114        let mut K = Array2::zeros((n1, n2));
115
116        for i in 0..n1 {
117            for j in 0..n2 {
118                K[[i, j]] = self.compute(&X1.row(i), &X2.row(j));
119            }
120        }
121
122        K
123    }
124}
125
126/// Single Gaussian Process layer in the deep architecture
127#[derive(Debug, Clone)]
128pub struct GaussianProcessLayer {
129    /// input_dim
130    pub input_dim: usize,
131    /// output_dim
132    pub output_dim: usize,
133    /// num_inducing
134    pub num_inducing: usize,
135    /// kernel
136    pub kernel: KernelType,
137    /// noise_variance
138    pub noise_variance: f64,
139    /// learning_rate
140    pub learning_rate: f64,
141    inducing_points: Array2<f64>,
142    mean_function: Array1<f64>,
143    is_trained: bool,
144}
145
146impl GaussianProcessLayer {
147    pub fn new(input_dim: usize, output_dim: usize, num_inducing: usize) -> Self {
148        Self {
149            input_dim,
150            output_dim,
151            num_inducing,
152            kernel: KernelType::RBF {
153                length_scale: 1.0,
154                variance: 1.0,
155            },
156            noise_variance: 0.1,
157            learning_rate: 0.01,
158            inducing_points: Array2::zeros((num_inducing, input_dim)),
159            mean_function: Array1::zeros(output_dim),
160            is_trained: false,
161        }
162    }
163
164    pub fn kernel(mut self, kernel: KernelType) -> Self {
165        self.kernel = kernel;
166        self
167    }
168
169    pub fn noise_variance(mut self, noise_variance: f64) -> Result<Self> {
170        if noise_variance <= 0.0 {
171            return Err(DeepGaussianProcessError::InvalidKernelParameter(noise_variance).into());
172        }
173        self.noise_variance = noise_variance;
174        Ok(self)
175    }
176
177    pub fn learning_rate(mut self, learning_rate: f64) -> Result<Self> {
178        if learning_rate <= 0.0 {
179            return Err(DeepGaussianProcessError::InvalidLearningRate(learning_rate).into());
180        }
181        self.learning_rate = learning_rate;
182        Ok(self)
183    }
184
185    fn initialize_inducing_points(&mut self, X: &ArrayView2<f64>, random_state: Option<u64>) {
186        let (n_samples, _) = X.dim();
187        let mut rng = match random_state {
188            Some(seed) => Random::seed(seed),
189            None => Random::seed(42),
190        };
191
192        // Select random subset of training points as inducing points
193        let num_selected = self.num_inducing.min(n_samples);
194        let mut selected_indices = Vec::new();
195        for _ in 0..num_selected {
196            let idx = rng.gen_range(0..n_samples);
197            if !selected_indices.contains(&idx) {
198                selected_indices.push(idx);
199            }
200        }
201
202        for (i, &idx) in selected_indices.iter().enumerate() {
203            self.inducing_points.row_mut(i).assign(&X.row(idx));
204        }
205    }
206
207    fn add_jitter(&self, matrix: &mut Array2<f64>, jitter: f64) {
208        for i in 0..matrix.nrows() {
209            matrix[[i, i]] += jitter;
210        }
211    }
212
213    fn safe_cholesky(&self, matrix: &Array2<f64>) -> Result<Array2<f64>> {
214        let mut A = matrix.clone();
215        let jitter = 1e-6;
216        self.add_jitter(&mut A, jitter);
217
218        // Simple Cholesky decomposition (simplified implementation)
219        let n = A.nrows();
220        let mut L: Array2<f64> = Array2::zeros((n, n));
221
222        for i in 0..n {
223            for j in 0..=i {
224                if i == j {
225                    let sum: f64 = (0..j).map(|k| L[[i, k]].powi(2)).sum();
226                    let val = A[[i, i]] - sum;
227                    if val <= 0.0 {
228                        return Err(DeepGaussianProcessError::MatrixOperationFailed(
229                            "Matrix is not positive definite".to_string(),
230                        )
231                        .into());
232                    }
233                    L[[i, j]] = val.sqrt();
234                } else {
235                    let sum: f64 = (0..j).map(|k| L[[i, k]] * L[[j, k]]).sum();
236                    L[[i, j]] = (A[[i, j]] - sum) / L[[j, j]];
237                }
238            }
239        }
240
241        Ok(L)
242    }
243
244    fn solve_triangular_lower(&self, L: &Array2<f64>, b: &Array1<f64>) -> Array1<f64> {
245        let n = L.nrows();
246        let mut x = Array1::zeros(n);
247
248        for i in 0..n {
249            let sum: f64 = (0..i).map(|j| L[[i, j]] * x[j]).sum();
250            x[i] = (b[i] - sum) / L[[i, i]];
251        }
252
253        x
254    }
255
256    fn solve_triangular_upper(&self, U: &Array2<f64>, b: &Array1<f64>) -> Array1<f64> {
257        let n = U.nrows();
258        let mut x = Array1::zeros(n);
259
260        for i in (0..n).rev() {
261            let sum: f64 = ((i + 1)..n).map(|j| U[[i, j]] * x[j]).sum();
262            x[i] = (b[i] - sum) / U[[i, i]];
263        }
264
265        x
266    }
267
268    #[allow(non_snake_case)]
269    pub fn fit(
270        &mut self,
271        X: &ArrayView2<f64>,
272        y: &ArrayView2<f64>,
273        random_state: Option<u64>,
274    ) -> Result<()> {
275        let (n_samples, n_features) = X.dim();
276        let (n_targets, output_dim) = y.dim();
277
278        if n_features != self.input_dim {
279            return Err(DeepGaussianProcessError::ShapeMismatch {
280                expected: vec![n_samples, self.input_dim],
281                actual: vec![n_samples, n_features],
282            }
283            .into());
284        }
285
286        if n_samples != n_targets || output_dim != self.output_dim {
287            return Err(DeepGaussianProcessError::ShapeMismatch {
288                expected: vec![n_samples, self.output_dim],
289                actual: vec![n_targets, output_dim],
290            }
291            .into());
292        }
293
294        self.initialize_inducing_points(X, random_state);
295
296        // Compute kernel matrices
297        let K_uu = self
298            .kernel
299            .compute_matrix(&self.inducing_points.view(), &self.inducing_points.view());
300        let K_uf = self.kernel.compute_matrix(&self.inducing_points.view(), X);
301
302        // Add noise to diagonal for numerical stability
303        let mut K_uu_noisy = K_uu.clone();
304        self.add_jitter(&mut K_uu_noisy, self.noise_variance);
305
306        // Simplified variational inference (ELBO optimization)
307        let L_uu = self.safe_cholesky(&K_uu_noisy)?;
308
309        // For each output dimension, fit a separate GP
310        for d in 0..self.output_dim {
311            let y_d = y.column(d);
312
313            // Solve for alpha: K_uu * alpha = K_uf * y
314            let K_uf_y = K_uf.dot(&y_d);
315            let alpha = self.solve_triangular_lower(&L_uu, &K_uf_y);
316            let L_uu_t = L_uu.t().to_owned();
317            let solution = self.solve_triangular_upper(&L_uu_t, &alpha);
318
319            // Store mean function for this output dimension
320            self.mean_function[d] = solution.mean().unwrap_or(0.0);
321        }
322
323        self.is_trained = true;
324        Ok(())
325    }
326
327    #[allow(non_snake_case)]
328    pub fn predict(&self, X: &ArrayView2<f64>) -> Result<Array2<f64>> {
329        if !self.is_trained {
330            return Err(DeepGaussianProcessError::ModelNotTrained.into());
331        }
332
333        let (n_test, n_features) = X.dim();
334        if n_features != self.input_dim {
335            return Err(DeepGaussianProcessError::ShapeMismatch {
336                expected: vec![n_test, self.input_dim],
337                actual: vec![n_test, n_features],
338            }
339            .into());
340        }
341
342        // Compute predictive mean using kernel computations
343        let K_su = self.kernel.compute_matrix(X, &self.inducing_points.view());
344        let K_uu = self
345            .kernel
346            .compute_matrix(&self.inducing_points.view(), &self.inducing_points.view());
347
348        let mut predictions = Array2::zeros((n_test, self.output_dim));
349
350        // Simple prediction using kernel mean
351        for i in 0..n_test {
352            for d in 0..self.output_dim {
353                // Simplified prediction: weighted average based on kernel similarities
354                let weights: f64 = K_su.row(i).sum();
355                if weights > 0.0 {
356                    predictions[[i, d]] =
357                        self.mean_function[d] * (weights / self.num_inducing as f64);
358                } else {
359                    predictions[[i, d]] = self.mean_function[d];
360                }
361            }
362        }
363
364        Ok(predictions)
365    }
366
367    pub fn predict_with_uncertainty(
368        &self,
369        X: &ArrayView2<f64>,
370    ) -> Result<(Array2<f64>, Array2<f64>)> {
371        let predictions = self.predict(X)?;
372        let (n_test, _) = X.dim();
373
374        // Simplified uncertainty estimation
375        let mut uncertainties = Array2::zeros((n_test, self.output_dim));
376        for i in 0..n_test {
377            for d in 0..self.output_dim {
378                uncertainties[[i, d]] = self.noise_variance.sqrt();
379            }
380        }
381
382        Ok((predictions, uncertainties))
383    }
384}
385
386/// Deep Gaussian Process for semi-supervised learning
387///
388/// This implements a multi-layer composition of Gaussian processes that can
389/// learn complex non-linear mappings while providing principled uncertainty
390/// quantification for semi-supervised learning scenarios.
391#[derive(Debug, Clone)]
392pub struct DeepGaussianProcess {
393    /// layer_dims
394    pub layer_dims: Vec<usize>,
395    /// num_inducing
396    pub num_inducing: usize,
397    /// epochs
398    pub epochs: usize,
399    /// learning_rate
400    pub learning_rate: f64,
401    /// noise_variance
402    pub noise_variance: f64,
403    /// random_state
404    pub random_state: Option<u64>,
405    layers: Vec<GaussianProcessLayer>,
406    n_classes: usize,
407    is_trained: bool,
408}
409
410impl Default for DeepGaussianProcess {
411    fn default() -> Self {
412        Self {
413            layer_dims: vec![10, 5, 2],
414            num_inducing: 20,
415            epochs: 50,
416            learning_rate: 0.01,
417            noise_variance: 0.1,
418            random_state: None,
419            layers: Vec::new(),
420            n_classes: 0,
421            is_trained: false,
422        }
423    }
424}
425
426impl DeepGaussianProcess {
427    pub fn new() -> Self {
428        Self::default()
429    }
430
431    pub fn layer_dims(mut self, layer_dims: Vec<usize>) -> Result<Self> {
432        if layer_dims.len() < 2 {
433            return Err(DeepGaussianProcessError::InvalidNumLayers(layer_dims.len()).into());
434        }
435        self.layer_dims = layer_dims;
436        Ok(self)
437    }
438
439    pub fn num_inducing(mut self, num_inducing: usize) -> Result<Self> {
440        if num_inducing == 0 {
441            return Err(DeepGaussianProcessError::InvalidInducingPoints(num_inducing).into());
442        }
443        self.num_inducing = num_inducing;
444        Ok(self)
445    }
446
447    pub fn epochs(mut self, epochs: usize) -> Result<Self> {
448        if epochs == 0 {
449            return Err(DeepGaussianProcessError::InvalidEpochs(epochs).into());
450        }
451        self.epochs = epochs;
452        Ok(self)
453    }
454
455    pub fn learning_rate(mut self, learning_rate: f64) -> Result<Self> {
456        if learning_rate <= 0.0 {
457            return Err(DeepGaussianProcessError::InvalidLearningRate(learning_rate).into());
458        }
459        self.learning_rate = learning_rate;
460        Ok(self)
461    }
462
463    pub fn noise_variance(mut self, noise_variance: f64) -> Result<Self> {
464        if noise_variance <= 0.0 {
465            return Err(DeepGaussianProcessError::InvalidKernelParameter(noise_variance).into());
466        }
467        self.noise_variance = noise_variance;
468        Ok(self)
469    }
470
471    pub fn random_state(mut self, random_state: u64) -> Self {
472        self.random_state = Some(random_state);
473        self
474    }
475
476    fn initialize_layers(&mut self) {
477        self.layers.clear();
478
479        for i in 0..self.layer_dims.len() - 1 {
480            let layer = GaussianProcessLayer::new(
481                self.layer_dims[i],
482                self.layer_dims[i + 1],
483                self.num_inducing,
484            )
485            .learning_rate(self.learning_rate)
486            .unwrap()
487            .noise_variance(self.noise_variance)
488            .unwrap();
489
490            self.layers.push(layer);
491        }
492    }
493
494    fn forward_pass(&self, X: &ArrayView2<f64>) -> Result<Array2<f64>> {
495        let mut current_data = X.to_owned();
496
497        for layer in self.layers.iter() {
498            current_data = layer.predict(&current_data.view())?;
499        }
500
501        Ok(current_data)
502    }
503
504    fn softmax(&self, X: &ArrayView2<f64>) -> Array2<f64> {
505        let mut result = X.to_owned();
506
507        for mut row in result.rows_mut() {
508            let max_val = row.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
509            for val in row.iter_mut() {
510                *val = (*val - max_val).exp();
511            }
512            let sum: f64 = row.sum();
513            if sum > 0.0 {
514                for val in row.iter_mut() {
515                    *val /= sum;
516                }
517            }
518        }
519
520        result
521    }
522
523    fn encode_labels(&self, y: &ArrayView1<i32>) -> Array2<f64> {
524        let n_samples = y.len();
525        let mut encoded = Array2::zeros((n_samples, self.n_classes));
526
527        for (i, &label) in y.iter().enumerate() {
528            if label >= 0 {
529                let class_idx = label as usize;
530                if class_idx < self.n_classes {
531                    encoded[[i, class_idx]] = 1.0;
532                }
533            }
534        }
535
536        encoded
537    }
538
539    pub fn fit(&mut self, X: &ArrayView2<f64>, y: &ArrayView1<i32>) -> Result<()> {
540        let (n_samples, n_features) = X.dim();
541
542        if y.len() != n_samples {
543            return Err(DeepGaussianProcessError::ShapeMismatch {
544                expected: vec![n_samples],
545                actual: vec![y.len()],
546            }
547            .into());
548        }
549
550        // Count labeled samples
551        let labeled_mask: Vec<bool> = y.iter().map(|&label| label >= 0).collect();
552        let n_labeled = labeled_mask.iter().filter(|&&x| x).count();
553
554        if n_labeled == 0 {
555            return Err(DeepGaussianProcessError::InsufficientLabeledSamples.into());
556        }
557
558        // Get unique classes from labeled data
559        let mut classes: Vec<i32> = y.iter().filter(|&&label| label >= 0).cloned().collect();
560        classes.sort_unstable();
561        classes.dedup();
562        self.n_classes = classes.len();
563
564        // Set input size from data
565        if self.layer_dims.is_empty() {
566            self.layer_dims = vec![n_features, n_features / 2, self.n_classes];
567        } else {
568            self.layer_dims[0] = n_features;
569            if self.layer_dims.len() > 1 {
570                *self.layer_dims.last_mut().unwrap() = self.n_classes;
571            }
572        }
573
574        self.initialize_layers();
575
576        // Create target matrix for labeled samples
577        let y_encoded = self.encode_labels(y);
578
579        // Layer-wise training with all data
580        let mut current_data = X.to_owned();
581        let num_layers = self.layers.len();
582
583        for layer_idx in 0..num_layers {
584            let seed = self.random_state.map(|s| s + layer_idx as u64);
585
586            if layer_idx == num_layers - 1 {
587                // Last layer: train only on labeled data for classification
588                let labeled_indices: Vec<usize> = labeled_mask
589                    .iter()
590                    .enumerate()
591                    .filter(|(_, &is_labeled)| is_labeled)
592                    .map(|(i, _)| i)
593                    .collect();
594
595                if !labeled_indices.is_empty() {
596                    let X_labeled = labeled_indices
597                        .iter()
598                        .map(|&i| current_data.row(i))
599                        .collect::<Vec<_>>();
600                    let y_labeled = labeled_indices
601                        .iter()
602                        .map(|&i| y_encoded.row(i))
603                        .collect::<Vec<_>>();
604
605                    // Create arrays from the collected rows
606                    let X_labeled_array =
607                        Array2::from_shape_fn((X_labeled.len(), current_data.ncols()), |(i, j)| {
608                            X_labeled[i][j]
609                        });
610                    let y_labeled_array =
611                        Array2::from_shape_fn((y_labeled.len(), y_encoded.ncols()), |(i, j)| {
612                            y_labeled[i][j]
613                        });
614
615                    self.layers[layer_idx].fit(
616                        &X_labeled_array.view(),
617                        &y_labeled_array.view(),
618                        seed,
619                    )?;
620                }
621            } else {
622                // Hidden layers: unsupervised pre-training with all data
623                // For simplicity, use identity mapping as target for unsupervised layers
624                let target_dim = self.layers[layer_idx].output_dim;
625                let identity_target =
626                    Array2::from_shape_fn((current_data.nrows(), target_dim), |(i, j)| {
627                        if j < current_data.ncols() {
628                            current_data[[i, j]]
629                        } else {
630                            0.0
631                        }
632                    });
633
634                self.layers[layer_idx].fit(&current_data.view(), &identity_target.view(), seed)?;
635            }
636
637            // Forward pass for next layer
638            if layer_idx < num_layers - 1 {
639                current_data = self.layers[layer_idx].predict(&current_data.view())?;
640            }
641        }
642
643        self.is_trained = true;
644        Ok(())
645    }
646
647    pub fn predict_proba(&self, X: &ArrayView2<f64>) -> Result<Array2<f64>> {
648        if !self.is_trained {
649            return Err(DeepGaussianProcessError::ModelNotTrained.into());
650        }
651
652        let logits = self.forward_pass(X)?;
653        Ok(self.softmax(&logits.view()))
654    }
655
656    pub fn predict(&self, X: &ArrayView2<f64>) -> Result<Array1<i32>> {
657        let probabilities = self.predict_proba(X)?;
658        let predictions = probabilities.map_axis(Axis(1), |row| {
659            row.iter()
660                .enumerate()
661                .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
662                .map(|(idx, _)| idx as i32)
663                .unwrap()
664        });
665        Ok(predictions)
666    }
667
668    pub fn predict_with_uncertainty(
669        &self,
670        X: &ArrayView2<f64>,
671    ) -> Result<(Array1<i32>, Array2<f64>)> {
672        if !self.is_trained {
673            return Err(DeepGaussianProcessError::ModelNotTrained.into());
674        }
675
676        // Get predictions with uncertainty from last layer
677        let mut current_data = X.to_owned();
678        let mut uncertainties = Array2::zeros((X.nrows(), self.n_classes));
679
680        // Forward pass through all but last layer
681        for layer in self.layers.iter().take(self.layers.len() - 1) {
682            current_data = layer.predict(&current_data.view())?;
683        }
684
685        // Get predictions and uncertainties from last layer
686        if let Some(last_layer) = self.layers.last() {
687            let (logits, layer_uncertainties) =
688                last_layer.predict_with_uncertainty(&current_data.view())?;
689            let probabilities = self.softmax(&logits.view());
690            let predictions = probabilities.map_axis(Axis(1), |row| {
691                row.iter()
692                    .enumerate()
693                    .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
694                    .map(|(idx, _)| idx as i32)
695                    .unwrap()
696            });
697
698            uncertainties = layer_uncertainties;
699            Ok((predictions, uncertainties))
700        } else {
701            Err(DeepGaussianProcessError::ModelNotTrained.into())
702        }
703    }
704}
705
706#[derive(Debug, Clone)]
707pub struct FittedDeepGaussianProcess {
708    model: DeepGaussianProcess,
709}
710
711impl Fit<ArrayView2<'_, f64>, ArrayView1<'_, i32>, FittedDeepGaussianProcess>
712    for DeepGaussianProcess
713{
714    type Fitted = FittedDeepGaussianProcess;
715
716    fn fit(
717        mut self,
718        X: &ArrayView2<'_, f64>,
719        y: &ArrayView1<'_, i32>,
720    ) -> Result<FittedDeepGaussianProcess> {
721        DeepGaussianProcess::fit(&mut self, X, y)?;
722        Ok(FittedDeepGaussianProcess { model: self })
723    }
724}
725
726impl Predict<ArrayView2<'_, f64>, Array1<i32>> for FittedDeepGaussianProcess {
727    fn predict(&self, X: &ArrayView2<'_, f64>) -> Result<Array1<i32>> {
728        self.model.predict(X)
729    }
730}
731
732impl PredictProba<ArrayView2<'_, f64>, Array2<f64>> for FittedDeepGaussianProcess {
733    fn predict_proba(&self, X: &ArrayView2<'_, f64>) -> Result<Array2<f64>> {
734        self.model.predict_proba(X)
735    }
736}
737
738#[allow(non_snake_case)]
739#[cfg(test)]
740mod tests {
741    use super::*;
742    use approx::assert_abs_diff_eq;
743    use scirs2_core::array;
744
745    #[test]
746    fn test_kernel_computation() {
747        let kernel = KernelType::RBF {
748            length_scale: 1.0,
749            variance: 1.0,
750        };
751        let x1 = array![1.0, 2.0];
752        let x2 = array![1.0, 2.0];
753
754        let result = kernel.compute(&x1.view(), &x2.view());
755        assert_abs_diff_eq!(result, 1.0, epsilon = 1e-10);
756
757        let x3 = array![3.0, 4.0];
758        let result2 = kernel.compute(&x1.view(), &x3.view());
759        assert!(result2 < 1.0 && result2 > 0.0);
760    }
761
762    #[test]
763    fn test_gaussian_process_layer_creation() {
764        let layer = GaussianProcessLayer::new(4, 2, 10)
765            .learning_rate(0.01)
766            .unwrap()
767            .noise_variance(0.1)
768            .unwrap();
769
770        assert_eq!(layer.input_dim, 4);
771        assert_eq!(layer.output_dim, 2);
772        assert_eq!(layer.num_inducing, 10);
773        assert_eq!(layer.learning_rate, 0.01);
774        assert_eq!(layer.noise_variance, 0.1);
775    }
776
777    #[test]
778    #[allow(non_snake_case)]
779    fn test_gaussian_process_layer_fit_predict() {
780        let mut layer = GaussianProcessLayer::new(2, 1, 5)
781            .learning_rate(0.01)
782            .unwrap()
783            .noise_variance(0.1)
784            .unwrap();
785
786        let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]];
787        let y = array![[1.0], [2.0], [3.0], [4.0]];
788
789        layer.fit(&X.view(), &y.view(), Some(42)).unwrap();
790        assert!(layer.is_trained);
791
792        let predictions = layer.predict(&X.view()).unwrap();
793        assert_eq!(predictions.dim(), (4, 1));
794
795        let (pred_mean, pred_var) = layer.predict_with_uncertainty(&X.view()).unwrap();
796        assert_eq!(pred_mean.dim(), (4, 1));
797        assert_eq!(pred_var.dim(), (4, 1));
798    }
799
800    #[test]
801    fn test_deep_gaussian_process_creation() {
802        let dgp = DeepGaussianProcess::new()
803            .layer_dims(vec![4, 3, 2])
804            .unwrap()
805            .num_inducing(10)
806            .unwrap()
807            .epochs(20)
808            .unwrap()
809            .learning_rate(0.01)
810            .unwrap()
811            .noise_variance(0.1)
812            .unwrap()
813            .random_state(42);
814
815        assert_eq!(dgp.layer_dims, vec![4, 3, 2]);
816        assert_eq!(dgp.num_inducing, 10);
817        assert_eq!(dgp.epochs, 20);
818        assert_eq!(dgp.learning_rate, 0.01);
819        assert_eq!(dgp.noise_variance, 0.1);
820        assert_eq!(dgp.random_state, Some(42));
821    }
822
823    #[test]
824    #[allow(non_snake_case)]
825    fn test_deep_gaussian_process_fit_predict() {
826        let dgp = DeepGaussianProcess::new()
827            .layer_dims(vec![2, 3, 2])
828            .unwrap()
829            .num_inducing(5)
830            .unwrap()
831            .epochs(10)
832            .unwrap()
833            .random_state(42);
834
835        let X = array![
836            [1.0, 2.0],
837            [2.0, 3.0],
838            [3.0, 4.0],
839            [4.0, 5.0],
840            [5.0, 6.0],
841            [6.0, 7.0]
842        ];
843        let y = array![0, 1, 0, 1, -1, -1]; // -1 indicates unlabeled
844
845        let fitted = dgp.fit(&X.view(), &y.view()).unwrap();
846
847        let predictions = fitted.predict(&X.view()).unwrap();
848        assert_eq!(predictions.len(), 6);
849
850        let probabilities = fitted.predict_proba(&X.view()).unwrap();
851        assert_eq!(probabilities.dim(), (6, 2));
852
853        // Check that probabilities sum to 1
854        for i in 0..6 {
855            let sum: f64 = probabilities.row(i).sum();
856            assert!((sum - 1.0).abs() < 1e-5); // More lenient epsilon for GP
857        }
858    }
859
860    #[test]
861    #[allow(non_snake_case)]
862    fn test_deep_gaussian_process_with_uncertainty() {
863        let dgp = DeepGaussianProcess::new()
864            .layer_dims(vec![2, 2])
865            .unwrap()
866            .num_inducing(3)
867            .unwrap()
868            .epochs(5)
869            .unwrap()
870            .random_state(42);
871
872        let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
873        let y = array![0, 1, 0];
874
875        let fitted = dgp.fit(&X.view(), &y.view()).unwrap();
876
877        let (predictions, uncertainties) =
878            fitted.model.predict_with_uncertainty(&X.view()).unwrap();
879        assert_eq!(predictions.len(), 3);
880        assert_eq!(uncertainties.dim(), (3, 2));
881
882        // Check that uncertainties are positive
883        for value in uncertainties.iter() {
884            assert!(*value >= 0.0);
885        }
886    }
887
888    #[test]
889    fn test_deep_gaussian_process_invalid_parameters() {
890        assert!(DeepGaussianProcess::new().layer_dims(vec![]).is_err());
891        assert!(DeepGaussianProcess::new().layer_dims(vec![10]).is_err());
892        assert!(DeepGaussianProcess::new().num_inducing(0).is_err());
893        assert!(DeepGaussianProcess::new().learning_rate(0.0).is_err());
894        assert!(DeepGaussianProcess::new().learning_rate(-0.1).is_err());
895        assert!(DeepGaussianProcess::new().noise_variance(0.0).is_err());
896        assert!(DeepGaussianProcess::new().epochs(0).is_err());
897    }
898
899    #[test]
900    #[allow(non_snake_case)]
901    fn test_deep_gaussian_process_insufficient_labeled_samples() {
902        let dgp = DeepGaussianProcess::new()
903            .layer_dims(vec![2, 2])
904            .unwrap()
905            .epochs(5)
906            .unwrap();
907
908        let X = array![[1.0, 2.0], [2.0, 3.0]];
909        let y = array![-1, -1]; // All unlabeled
910
911        let result = dgp.fit(&X.view(), &y.view());
912        assert!(result.is_err());
913    }
914
915    #[test]
916    fn test_kernel_types() {
917        let x1 = array![1.0, 2.0];
918        let x2 = array![2.0, 3.0];
919
920        let rbf = KernelType::RBF {
921            length_scale: 1.0,
922            variance: 1.0,
923        };
924        let matern32 = KernelType::Matern32 {
925            length_scale: 1.0,
926            variance: 1.0,
927        };
928        let matern52 = KernelType::Matern52 {
929            length_scale: 1.0,
930            variance: 1.0,
931        };
932        let linear = KernelType::Linear {
933            variance: 1.0,
934            offset: 0.0,
935        };
936
937        let rbf_val = rbf.compute(&x1.view(), &x2.view());
938        let matern32_val = matern32.compute(&x1.view(), &x2.view());
939        let matern52_val = matern52.compute(&x1.view(), &x2.view());
940        let linear_val = linear.compute(&x1.view(), &x2.view());
941
942        assert!(rbf_val > 0.0 && rbf_val <= 1.0);
943        assert!(matern32_val > 0.0 && matern32_val <= 1.0);
944        assert!(matern52_val > 0.0 && matern52_val <= 1.0);
945        assert!(linear_val > 0.0);
946    }
947}