sklears_kernel_approximation/kernel_ridge_regression/mod.rs
1//! Kernel Ridge Regression Module
2//!
3//! This module provides comprehensive kernel ridge regression implementations with
4//! various approximation methods, regularization strategies, and robust variants.
5//!
6//! ## Overview
7//!
8//! Kernel ridge regression extends linear ridge regression to nonlinear problems
9//! by using kernel methods. This module includes:
10//!
11//! - **Basic Kernel Ridge Regression**: Standard implementation with multiple solvers
12//! - **Multi-Task Learning**: Joint learning across multiple related regression tasks
13//! - **Robust Variants**: Resistance to outliers using robust loss functions
14//! - **Kernel Approximation**: Efficient large-scale methods using feature approximations
15//!
16//! ## Kernel Approximation Methods
17//!
18//! All implementations support multiple kernel approximation methods for scalability:
19//!
20//! - **Nyström Method**: Landmark-based kernel approximation
21//! - **Random Fourier Features**: Approximate shift-invariant kernels
22//! - **Structured Random Features**: Computational improvements to RFF
23//! - **Fastfood Transform**: Fast Walsh-Hadamard based approximation
24//!
25//! ## Architecture
26//!
27//! The module is organized into focused submodules:
28//!
29//! - [`core_types`] - Shared types, enums, and utility functions
30//! - [`basic_regression`] - Standard kernel ridge regression
31//! - [`multitask_regression`] - Multi-task learning capabilities
32//! - [`robust_regression`] - Robust variants for outlier resistance
33//!
34//! ## Examples
35//!
36//! ### Basic Usage
37//!
38//! ```rust,ignore
39//! use sklears_kernel_approximation::kernel_ridge_regression::{
40//! KernelRidgeRegression, ApproximationMethod
41//! };
42//! use sklears_core::{Fit, Predict};
43//! use scirs2_core::ndarray::array;
44//!
45//! // Prepare data
46//! let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]];
47//! let y = array![1.0, 4.0, 9.0, 16.0];
48//!
49//! // Create approximation method
50//! let approximation = ApproximationMethod::RandomFourierFeatures {
51//! n_components: 100,
52//! gamma: 1.0,
53//! };
54//!
55//! // Create and fit model
56//! let krr = KernelRidgeRegression::new(approximation)
57//! .alpha(0.1)
58//! .random_state(42);
59//!
60//! let fitted_model = krr.fit(&X, &y)?;
61//! let predictions = fitted_model.predict(&X)?;
62//! # Ok::<(), Box<dyn std::error::Error>>(())
63//! ```
64//!
65//! ### Multi-Task Learning
66//!
67//! ```rust,ignore
68//! use sklears_kernel_approximation::kernel_ridge_regression::{
69//! MultiTaskKernelRidgeRegression, ApproximationMethod, TaskRegularization
70//! };
71//! use sklears_core::{Fit, Predict};
72//! use scirs2_core::ndarray::array;
73//!
74//! let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]];
75//! let Y = array![[1.0, 2.0], [4.0, 5.0], [9.0, 10.0]]; // Two related tasks
76//!
77//! let approximation = ApproximationMethod::RandomFourierFeatures {
78//! n_components: 50,
79//! gamma: 1.0,
80//! };
81//!
82//! let mtkrr = MultiTaskKernelRidgeRegression::new(approximation)
83//! .alpha(0.1)
84//! .task_regularization(TaskRegularization::L2 { beta: 0.01 });
85//!
86//! let fitted_model = mtkrr.fit(&X, &Y)?;
87//! let predictions = fitted_model.predict(&X)?;
88//! # Ok::<(), Box<dyn std::error::Error>>(())
89//! ```
90//!
91//! ### Robust Regression
92//!
93//! ```rust,ignore
94//! use sklears_kernel_approximation::kernel_ridge_regression::{
95//! RobustKernelRidgeRegression, ApproximationMethod, RobustLoss
96//! };
97//! use sklears_core::{Fit, Predict};
98//! use scirs2_core::ndarray::array;
99//!
100//! let X = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [10.0, 10.0]]; // Last point is outlier
101//! let y = array![1.0, 2.0, 3.0, 100.0]; // Last target is outlier
102//!
103//! let approximation = ApproximationMethod::RandomFourierFeatures {
104//! n_components: 50,
105//! gamma: 1.0,
106//! };
107//!
108//! let robust_krr = RobustKernelRidgeRegression::new(approximation)
109//! .alpha(0.1)
110//! .robust_loss(RobustLoss::Huber { delta: 1.0 });
111//!
112//! let fitted_model = robust_krr.fit(&X, &y)?;
113//! let predictions = fitted_model.predict(&X)?;
114//! # Ok::<(), Box<dyn std::error::Error>>(())
115//! ```
116
117// Module declarations
118pub mod basic_regression;
119pub mod core_types;
120pub mod multitask_regression;
121pub mod robust_regression;
122
123// Re-export core types and utilities
124pub use crate::Untrained;
125pub use core_types::{
126 ApproximationMethod, FastfoodTransform, FeatureTransformer, Float, Kernel, Nystroem,
127 RBFSampler, SamplingStrategy, Solver, StructuredRandomFeatures, Trained,
128};
129
130// Re-export basic kernel ridge regression
131pub use basic_regression::{IntoUntrained, KernelRidgeRegression, OnlineKernelRidgeRegression};
132
133// Re-export multi-task kernel ridge regression
134pub use multitask_regression::{MultiTaskKernelRidgeRegression, TaskRegularization};
135
136// Re-export robust kernel ridge regression
137pub use robust_regression::{RobustKernelRidgeRegression, RobustLoss};
138
139/// Prelude module for convenient imports
140///
141/// This module re-exports the most commonly used types and traits
142/// for kernel ridge regression.
143pub mod prelude {
144 pub use super::basic_regression::{KernelRidgeRegression, OnlineKernelRidgeRegression};
145 pub use super::core_types::{ApproximationMethod, Kernel, SamplingStrategy, Solver};
146 pub use super::multitask_regression::{MultiTaskKernelRidgeRegression, TaskRegularization};
147 pub use super::robust_regression::{RobustKernelRidgeRegression, RobustLoss};
148 pub use sklears_core::prelude::{Estimator, Fit, Predict};
149}
150
151/// Utility functions for kernel ridge regression
152pub mod utils {
153 use super::core_types::*;
154 use super::multitask_regression::TaskRegularization;
155 use super::robust_regression::RobustLoss;
156 use super::Untrained;
157 use scirs2_core::ndarray::{Array1, Array2};
158 use sklears_core::error::Result;
159 use sklears_core::prelude::Float;
160
161 /// Create a basic kernel ridge regression model with Random Fourier Features
162 pub fn create_rff_model(
163 n_components: usize,
164 gamma: Float,
165 alpha: Float,
166 ) -> KernelRidgeRegression<Untrained> {
167 let approximation = ApproximationMethod::RandomFourierFeatures {
168 n_components,
169 gamma,
170 };
171 KernelRidgeRegression::new(approximation).alpha(alpha)
172 }
173
174 /// Create a basic kernel ridge regression model with Nyström approximation
175 pub fn create_nystroem_model(
176 kernel: Kernel,
177 n_components: usize,
178 alpha: Float,
179 ) -> KernelRidgeRegression<Untrained> {
180 let approximation = ApproximationMethod::Nystroem {
181 kernel,
182 n_components,
183 sampling_strategy: SamplingStrategy::Random,
184 };
185 KernelRidgeRegression::new(approximation).alpha(alpha)
186 }
187
188 /// Create a multi-task model with L2 task regularization
189 pub fn create_multitask_l2_model(
190 approximation: ApproximationMethod,
191 alpha: Float,
192 beta: Float,
193 ) -> MultiTaskKernelRidgeRegression<Untrained> {
194 MultiTaskKernelRidgeRegression::new(approximation)
195 .alpha(alpha)
196 .task_regularization(TaskRegularization::L2 { beta })
197 }
198
199 /// Create a robust model with Huber loss
200 pub fn create_robust_huber_model(
201 approximation: ApproximationMethod,
202 alpha: Float,
203 delta: Float,
204 ) -> RobustKernelRidgeRegression<Untrained> {
205 RobustKernelRidgeRegression::new(approximation)
206 .alpha(alpha)
207 .robust_loss(RobustLoss::Huber { delta })
208 }
209
210 /// Validate input dimensions for kernel ridge regression
211 pub fn validate_input_dimensions(x: &Array2<Float>, y: &Array1<Float>) -> Result<()> {
212 if x.nrows() != y.len() {
213 return Err(sklears_core::error::SklearsError::InvalidInput(format!(
214 "Number of samples in X ({}) and y ({}) must match",
215 x.nrows(),
216 y.len()
217 )));
218 }
219
220 if x.nrows() == 0 {
221 return Err(sklears_core::error::SklearsError::InvalidInput(
222 "Input arrays must not be empty".to_string(),
223 ));
224 }
225
226 if x.ncols() == 0 {
227 return Err(sklears_core::error::SklearsError::InvalidInput(
228 "Input must have at least one feature".to_string(),
229 ));
230 }
231
232 Ok(())
233 }
234
235 /// Validate input dimensions for multi-task kernel ridge regression
236 pub fn validate_multitask_input_dimensions(x: &Array2<Float>, y: &Array2<Float>) -> Result<()> {
237 if x.nrows() != y.nrows() {
238 return Err(sklears_core::error::SklearsError::InvalidInput(format!(
239 "Number of samples in X ({}) and Y ({}) must match",
240 x.nrows(),
241 y.nrows()
242 )));
243 }
244
245 if x.nrows() == 0 {
246 return Err(sklears_core::error::SklearsError::InvalidInput(
247 "Input arrays must not be empty".to_string(),
248 ));
249 }
250
251 if x.ncols() == 0 {
252 return Err(sklears_core::error::SklearsError::InvalidInput(
253 "Input must have at least one feature".to_string(),
254 ));
255 }
256
257 if y.ncols() == 0 {
258 return Err(sklears_core::error::SklearsError::InvalidInput(
259 "Must have at least one task".to_string(),
260 ));
261 }
262
263 Ok(())
264 }
265
266 use super::basic_regression::KernelRidgeRegression;
267 use super::multitask_regression::MultiTaskKernelRidgeRegression;
268 use super::robust_regression::RobustKernelRidgeRegression;
269}