sklears_kernel_approximation/kernel_ridge_regression/
mod.rs

1//! Kernel Ridge Regression Module
2//!
3//! This module provides comprehensive kernel ridge regression implementations with
4//! various approximation methods, regularization strategies, and robust variants.
5//!
6//! ## Overview
7//!
8//! Kernel ridge regression extends linear ridge regression to nonlinear problems
9//! by using kernel methods. This module includes:
10//!
11//! - **Basic Kernel Ridge Regression**: Standard implementation with multiple solvers
12//! - **Multi-Task Learning**: Joint learning across multiple related regression tasks
13//! - **Robust Variants**: Resistance to outliers using robust loss functions
14//! - **Kernel Approximation**: Efficient large-scale methods using feature approximations
15//!
16//! ## Kernel Approximation Methods
17//!
18//! All implementations support multiple kernel approximation methods for scalability:
19//!
20//! - **Nyström Method**: Landmark-based kernel approximation
21//! - **Random Fourier Features**: Approximate shift-invariant kernels
22//! - **Structured Random Features**: Computational improvements to RFF
23//! - **Fastfood Transform**: Fast Walsh-Hadamard based approximation
24//!
25//! ## Architecture
26//!
27//! The module is organized into focused submodules:
28//!
29//! - [`core_types`] - Shared types, enums, and utility functions
30//! - [`basic_regression`] - Standard kernel ridge regression
31//! - [`multitask_regression`] - Multi-task learning capabilities
32//! - [`robust_regression`] - Robust variants for outlier resistance
33//!
34//! ## Examples
35//!
36//! ### Basic Usage
37//!
38//! ```rust,ignore
39//! use sklears_kernel_approximation::kernel_ridge_regression::{
40//!     KernelRidgeRegression, ApproximationMethod
41//! };
42//! use sklears_core::{Fit, Predict};
43//! use scirs2_core::ndarray::array;
44//!
45//! // Prepare data
46//! let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]];
47//! let y = array![1.0, 4.0, 9.0, 16.0];
48//!
49//! // Create approximation method
50//! let approximation = ApproximationMethod::RandomFourierFeatures {
51//!     n_components: 100,
52//!     gamma: 1.0,
53//! };
54//!
55//! // Create and fit model
56//! let krr = KernelRidgeRegression::new(approximation)
57//!     .alpha(0.1)
58//!     .random_state(42);
59//!
60//! let fitted_model = krr.fit(&X, &y)?;
61//! let predictions = fitted_model.predict(&X)?;
62//! # Ok::<(), Box<dyn std::error::Error>>(())
63//! ```
64//!
65//! ### Multi-Task Learning
66//!
67//! ```rust,ignore
68//! use sklears_kernel_approximation::kernel_ridge_regression::{
69//!     MultiTaskKernelRidgeRegression, ApproximationMethod, TaskRegularization
70//! };
71//! use sklears_core::{Fit, Predict};
72//! use scirs2_core::ndarray::array;
73//!
74//! let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
75//! let Y = array![[1.0, 2.0], [4.0, 5.0], [9.0, 10.0]]; // Two related tasks
76//!
77//! let approximation = ApproximationMethod::RandomFourierFeatures {
78//!     n_components: 50,
79//!     gamma: 1.0,
80//! };
81//!
82//! let mtkrr = MultiTaskKernelRidgeRegression::new(approximation)
83//!     .alpha(0.1)
84//!     .task_regularization(TaskRegularization::L2 { beta: 0.01 });
85//!
86//! let fitted_model = mtkrr.fit(&X, &Y)?;
87//! let predictions = fitted_model.predict(&X)?;
88//! # Ok::<(), Box<dyn std::error::Error>>(())
89//! ```
90//!
91//! ### Robust Regression
92//!
93//! ```rust,ignore
94//! use sklears_kernel_approximation::kernel_ridge_regression::{
95//!     RobustKernelRidgeRegression, ApproximationMethod, RobustLoss
96//! };
97//! use sklears_core::{Fit, Predict};
98//! use scirs2_core::ndarray::array;
99//!
100//! let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [10.0, 10.0]]; // Last point is outlier
101//! let y = array![1.0, 2.0, 3.0, 100.0]; // Last target is outlier
102//!
103//! let approximation = ApproximationMethod::RandomFourierFeatures {
104//!     n_components: 50,
105//!     gamma: 1.0,
106//! };
107//!
108//! let robust_krr = RobustKernelRidgeRegression::new(approximation)
109//!     .alpha(0.1)
110//!     .robust_loss(RobustLoss::Huber { delta: 1.0 });
111//!
112//! let fitted_model = robust_krr.fit(&X, &y)?;
113//! let predictions = fitted_model.predict(&X)?;
114//! # Ok::<(), Box<dyn std::error::Error>>(())
115//! ```
116
117// Module declarations
118pub mod basic_regression;
119pub mod core_types;
120pub mod multitask_regression;
121pub mod robust_regression;
122
123// Re-export core types and utilities
124pub use crate::Untrained;
125pub use core_types::{
126    ApproximationMethod, FastfoodTransform, FeatureTransformer, Float, Kernel, Nystroem,
127    RBFSampler, SamplingStrategy, Solver, StructuredRandomFeatures, Trained,
128};
129
130// Re-export basic kernel ridge regression
131pub use basic_regression::{IntoUntrained, KernelRidgeRegression, OnlineKernelRidgeRegression};
132
133// Re-export multi-task kernel ridge regression
134pub use multitask_regression::{MultiTaskKernelRidgeRegression, TaskRegularization};
135
136// Re-export robust kernel ridge regression
137pub use robust_regression::{RobustKernelRidgeRegression, RobustLoss};
138
139/// Prelude module for convenient imports
140///
141/// This module re-exports the most commonly used types and traits
142/// for kernel ridge regression.
143pub mod prelude {
144    pub use super::basic_regression::{KernelRidgeRegression, OnlineKernelRidgeRegression};
145    pub use super::core_types::{ApproximationMethod, Kernel, SamplingStrategy, Solver};
146    pub use super::multitask_regression::{MultiTaskKernelRidgeRegression, TaskRegularization};
147    pub use super::robust_regression::{RobustKernelRidgeRegression, RobustLoss};
148    pub use sklears_core::prelude::{Estimator, Fit, Predict};
149}
150
151/// Utility functions for kernel ridge regression
152pub mod utils {
153    use super::core_types::*;
154    use super::multitask_regression::TaskRegularization;
155    use super::robust_regression::RobustLoss;
156    use super::Untrained;
157    use scirs2_core::ndarray::{Array1, Array2};
158    use sklears_core::error::Result;
159    use sklears_core::prelude::Float;
160
161    /// Create a basic kernel ridge regression model with Random Fourier Features
162    pub fn create_rff_model(
163        n_components: usize,
164        gamma: Float,
165        alpha: Float,
166    ) -> KernelRidgeRegression<Untrained> {
167        let approximation = ApproximationMethod::RandomFourierFeatures {
168            n_components,
169            gamma,
170        };
171        KernelRidgeRegression::new(approximation).alpha(alpha)
172    }
173
174    /// Create a basic kernel ridge regression model with Nyström approximation
175    pub fn create_nystroem_model(
176        kernel: Kernel,
177        n_components: usize,
178        alpha: Float,
179    ) -> KernelRidgeRegression<Untrained> {
180        let approximation = ApproximationMethod::Nystroem {
181            kernel,
182            n_components,
183            sampling_strategy: SamplingStrategy::Random,
184        };
185        KernelRidgeRegression::new(approximation).alpha(alpha)
186    }
187
188    /// Create a multi-task model with L2 task regularization
189    pub fn create_multitask_l2_model(
190        approximation: ApproximationMethod,
191        alpha: Float,
192        beta: Float,
193    ) -> MultiTaskKernelRidgeRegression<Untrained> {
194        MultiTaskKernelRidgeRegression::new(approximation)
195            .alpha(alpha)
196            .task_regularization(TaskRegularization::L2 { beta })
197    }
198
199    /// Create a robust model with Huber loss
200    pub fn create_robust_huber_model(
201        approximation: ApproximationMethod,
202        alpha: Float,
203        delta: Float,
204    ) -> RobustKernelRidgeRegression<Untrained> {
205        RobustKernelRidgeRegression::new(approximation)
206            .alpha(alpha)
207            .robust_loss(RobustLoss::Huber { delta })
208    }
209
210    /// Validate input dimensions for kernel ridge regression
211    pub fn validate_input_dimensions(x: &Array2<Float>, y: &Array1<Float>) -> Result<()> {
212        if x.nrows() != y.len() {
213            return Err(sklears_core::error::SklearsError::InvalidInput(format!(
214                "Number of samples in X ({}) and y ({}) must match",
215                x.nrows(),
216                y.len()
217            )));
218        }
219
220        if x.nrows() == 0 {
221            return Err(sklears_core::error::SklearsError::InvalidInput(
222                "Input arrays must not be empty".to_string(),
223            ));
224        }
225
226        if x.ncols() == 0 {
227            return Err(sklears_core::error::SklearsError::InvalidInput(
228                "Input must have at least one feature".to_string(),
229            ));
230        }
231
232        Ok(())
233    }
234
235    /// Validate input dimensions for multi-task kernel ridge regression
236    pub fn validate_multitask_input_dimensions(x: &Array2<Float>, y: &Array2<Float>) -> Result<()> {
237        if x.nrows() != y.nrows() {
238            return Err(sklears_core::error::SklearsError::InvalidInput(format!(
239                "Number of samples in X ({}) and Y ({}) must match",
240                x.nrows(),
241                y.nrows()
242            )));
243        }
244
245        if x.nrows() == 0 {
246            return Err(sklears_core::error::SklearsError::InvalidInput(
247                "Input arrays must not be empty".to_string(),
248            ));
249        }
250
251        if x.ncols() == 0 {
252            return Err(sklears_core::error::SklearsError::InvalidInput(
253                "Input must have at least one feature".to_string(),
254            ));
255        }
256
257        if y.ncols() == 0 {
258            return Err(sklears_core::error::SklearsError::InvalidInput(
259                "Must have at least one task".to_string(),
260            ));
261        }
262
263        Ok(())
264    }
265
266    use super::basic_regression::KernelRidgeRegression;
267    use super::multitask_regression::MultiTaskKernelRidgeRegression;
268    use super::robust_regression::RobustKernelRidgeRegression;
269}