rustkernel_ml/
regression.rs

1//! Regression kernels.
2//!
3//! This module provides regression algorithms:
4//! - Linear regression (OLS via normal equations)
5//! - Ridge regression (L2 regularization)
6
7use crate::types::{DataMatrix, RegressionResult};
8use rustkernel_core::{domain::Domain, kernel::KernelMetadata, traits::GpuKernel};
9
10// ============================================================================
11// Linear Regression Kernel
12// ============================================================================
13
14/// Linear regression kernel.
15///
16/// Ordinary Least Squares regression using the normal equations:
17/// β = (X^T X)^(-1) X^T y
18#[derive(Debug, Clone)]
19pub struct LinearRegression {
20    metadata: KernelMetadata,
21}
22
23impl Default for LinearRegression {
24    fn default() -> Self {
25        Self::new()
26    }
27}
28
29impl LinearRegression {
30    /// Create a new linear regression kernel.
31    #[must_use]
32    pub fn new() -> Self {
33        Self {
34            metadata: KernelMetadata::batch("ml/linear-regression", Domain::StatisticalML)
35                .with_description("OLS linear regression via normal equations")
36                .with_throughput(50_000)
37                .with_latency_us(20.0),
38        }
39    }
40
41    /// Fit linear regression model.
42    ///
43    /// # Arguments
44    /// * `x` - Feature matrix (n_samples x n_features)
45    /// * `y` - Target vector (n_samples)
46    /// * `fit_intercept` - Whether to fit an intercept term
47    pub fn compute(x: &DataMatrix, y: &[f64], fit_intercept: bool) -> RegressionResult {
48        let n = x.n_samples;
49        let d = x.n_features;
50
51        if n == 0 || d == 0 || y.len() != n {
52            return RegressionResult {
53                coefficients: Vec::new(),
54                intercept: 0.0,
55                r2_score: 0.0,
56            };
57        }
58
59        // Optionally add intercept column
60        let (x_aug, d_aug) = if fit_intercept {
61            let mut aug_data = Vec::with_capacity(n * (d + 1));
62            for i in 0..n {
63                aug_data.push(1.0); // Intercept column
64                aug_data.extend_from_slice(x.row(i));
65            }
66            (DataMatrix::new(aug_data, n, d + 1), d + 1)
67        } else {
68            (x.clone(), d)
69        };
70
71        // Compute X^T X
72        let xtx = Self::matrix_multiply_transpose_left(&x_aug);
73
74        // Compute X^T y
75        let xty = Self::matrix_vector_multiply_transpose(&x_aug, y);
76
77        // Solve (X^T X) β = X^T y using Cholesky decomposition
78        let coefficients = match Self::solve_positive_definite(&xtx, &xty, d_aug) {
79            Some(c) => c,
80            None => vec![0.0; d_aug], // Fall back to zeros if singular
81        };
82
83        // Extract intercept if fitted
84        let (intercept, coefs) = if fit_intercept {
85            (coefficients[0], coefficients[1..].to_vec())
86        } else {
87            (0.0, coefficients)
88        };
89
90        // Compute predictions and R²
91        let predictions: Vec<f64> = (0..n)
92            .map(|i| {
93                let xi = x.row(i);
94                intercept + coefs.iter().zip(xi.iter()).map(|(c, x)| c * x).sum::<f64>()
95            })
96            .collect();
97
98        let r2 = Self::compute_r2(y, &predictions);
99
100        RegressionResult {
101            coefficients: coefs,
102            intercept,
103            r2_score: r2,
104        }
105    }
106
107    /// Compute X^T X (symmetric positive semi-definite).
108    fn matrix_multiply_transpose_left(x: &DataMatrix) -> Vec<f64> {
109        let n = x.n_samples;
110        let d = x.n_features;
111        let mut result = vec![0.0f64; d * d];
112
113        for i in 0..d {
114            for j in 0..=i {
115                let mut sum = 0.0;
116                for k in 0..n {
117                    sum += x.get(k, i) * x.get(k, j);
118                }
119                result[i * d + j] = sum;
120                result[j * d + i] = sum; // Symmetric
121            }
122        }
123
124        result
125    }
126
127    /// Compute X^T y.
128    #[allow(clippy::needless_range_loop)]
129    fn matrix_vector_multiply_transpose(x: &DataMatrix, y: &[f64]) -> Vec<f64> {
130        let n = x.n_samples;
131        let d = x.n_features;
132        let mut result = vec![0.0f64; d];
133
134        for j in 0..d {
135            for i in 0..n {
136                result[j] += x.get(i, j) * y[i];
137            }
138        }
139
140        result
141    }
142
143    /// Solve Ax = b for positive definite A using Cholesky decomposition.
144    fn solve_positive_definite(a: &[f64], b: &[f64], n: usize) -> Option<Vec<f64>> {
145        // Cholesky decomposition: A = L L^T
146        let mut l = vec![0.0f64; n * n];
147
148        for i in 0..n {
149            for j in 0..=i {
150                let mut sum = a[i * n + j];
151                for k in 0..j {
152                    sum -= l[i * n + k] * l[j * n + k];
153                }
154
155                if i == j {
156                    if sum <= 0.0 {
157                        // Add regularization for numerical stability
158                        let reg_sum = sum + 1e-10;
159                        if reg_sum <= 0.0 {
160                            return None;
161                        }
162                        l[i * n + j] = reg_sum.sqrt();
163                    } else {
164                        l[i * n + j] = sum.sqrt();
165                    }
166                } else {
167                    l[i * n + j] = sum / l[j * n + j];
168                }
169            }
170        }
171
172        // Forward substitution: L y = b
173        let mut y = vec![0.0f64; n];
174        for i in 0..n {
175            let mut sum = b[i];
176            for j in 0..i {
177                sum -= l[i * n + j] * y[j];
178            }
179            y[i] = sum / l[i * n + i];
180        }
181
182        // Backward substitution: L^T x = y
183        let mut x = vec![0.0f64; n];
184        for i in (0..n).rev() {
185            let mut sum = y[i];
186            for j in (i + 1)..n {
187                sum -= l[j * n + i] * x[j]; // L^T[i][j] = L[j][i]
188            }
189            x[i] = sum / l[i * n + i];
190        }
191
192        Some(x)
193    }
194
195    /// Compute R² score.
196    fn compute_r2(y_true: &[f64], y_pred: &[f64]) -> f64 {
197        let n = y_true.len();
198        if n == 0 {
199            return 0.0;
200        }
201
202        let y_mean: f64 = y_true.iter().sum::<f64>() / n as f64;
203
204        let ss_tot: f64 = y_true.iter().map(|&y| (y - y_mean).powi(2)).sum();
205        let ss_res: f64 = y_true
206            .iter()
207            .zip(y_pred.iter())
208            .map(|(&yt, &yp)| (yt - yp).powi(2))
209            .sum();
210
211        if ss_tot == 0.0 {
212            return if ss_res == 0.0 { 1.0 } else { 0.0 };
213        }
214
215        1.0 - ss_res / ss_tot
216    }
217}
218
219impl GpuKernel for LinearRegression {
220    fn metadata(&self) -> &KernelMetadata {
221        &self.metadata
222    }
223}
224
225// ============================================================================
226// Ridge Regression Kernel
227// ============================================================================
228
229/// Ridge regression kernel.
230///
231/// Linear regression with L2 regularization:
232/// β = (X^T X + αI)^(-1) X^T y
233#[derive(Debug, Clone)]
234pub struct RidgeRegression {
235    metadata: KernelMetadata,
236}
237
238impl Default for RidgeRegression {
239    fn default() -> Self {
240        Self::new()
241    }
242}
243
244impl RidgeRegression {
245    /// Create a new ridge regression kernel.
246    #[must_use]
247    pub fn new() -> Self {
248        Self {
249            metadata: KernelMetadata::batch("ml/ridge-regression", Domain::StatisticalML)
250                .with_description("Ridge regression with L2 regularization")
251                .with_throughput(50_000)
252                .with_latency_us(20.0),
253        }
254    }
255
256    /// Fit ridge regression model.
257    ///
258    /// # Arguments
259    /// * `x` - Feature matrix (n_samples x n_features)
260    /// * `y` - Target vector (n_samples)
261    /// * `alpha` - Regularization strength
262    /// * `fit_intercept` - Whether to fit an intercept term
263    #[allow(clippy::needless_range_loop)]
264    pub fn compute(x: &DataMatrix, y: &[f64], alpha: f64, fit_intercept: bool) -> RegressionResult {
265        let n = x.n_samples;
266        let d = x.n_features;
267
268        if n == 0 || d == 0 || y.len() != n {
269            return RegressionResult {
270                coefficients: Vec::new(),
271                intercept: 0.0,
272                r2_score: 0.0,
273            };
274        }
275
276        // Center data if fitting intercept (standard approach for Ridge)
277        let (x_centered, y_centered, x_mean, y_mean) = if fit_intercept {
278            let x_mean: Vec<f64> = (0..d)
279                .map(|j| (0..n).map(|i| x.get(i, j)).sum::<f64>() / n as f64)
280                .collect();
281            let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
282
283            let mut x_data = Vec::with_capacity(n * d);
284            for i in 0..n {
285                for j in 0..d {
286                    x_data.push(x.get(i, j) - x_mean[j]);
287                }
288            }
289
290            let y_centered: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
291
292            (DataMatrix::new(x_data, n, d), y_centered, x_mean, y_mean)
293        } else {
294            (x.clone(), y.to_vec(), vec![0.0; d], 0.0)
295        };
296
297        // Compute X^T X + αI
298        let mut xtx = LinearRegression::matrix_multiply_transpose_left(&x_centered);
299
300        // Add regularization to diagonal
301        for i in 0..d {
302            xtx[i * d + i] += alpha;
303        }
304
305        // Compute X^T y
306        let xty = LinearRegression::matrix_vector_multiply_transpose(&x_centered, &y_centered);
307
308        // Solve (X^T X + αI) β = X^T y
309        let coefficients = match LinearRegression::solve_positive_definite(&xtx, &xty, d) {
310            Some(c) => c,
311            None => vec![0.0; d],
312        };
313
314        // Compute intercept: b = y_mean - Σ(β_j * x_mean_j)
315        let intercept = if fit_intercept {
316            y_mean
317                - coefficients
318                    .iter()
319                    .zip(x_mean.iter())
320                    .map(|(c, m)| c * m)
321                    .sum::<f64>()
322        } else {
323            0.0
324        };
325
326        // Compute predictions and R²
327        let predictions: Vec<f64> = (0..n)
328            .map(|i| {
329                let xi = x.row(i);
330                intercept
331                    + coefficients
332                        .iter()
333                        .zip(xi.iter())
334                        .map(|(c, x)| c * x)
335                        .sum::<f64>()
336            })
337            .collect();
338
339        let r2 = LinearRegression::compute_r2(y, &predictions);
340
341        RegressionResult {
342            coefficients,
343            intercept,
344            r2_score: r2,
345        }
346    }
347}
348
349impl GpuKernel for RidgeRegression {
350    fn metadata(&self) -> &KernelMetadata {
351        &self.metadata
352    }
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    fn create_linear_data() -> (DataMatrix, Vec<f64>) {
360        // y = 2*x1 + 3*x2 + 1
361        let x = DataMatrix::from_rows(&[
362            &[1.0, 1.0],
363            &[2.0, 1.0],
364            &[1.0, 2.0],
365            &[2.0, 2.0],
366            &[3.0, 1.0],
367            &[1.0, 3.0],
368        ]);
369        let y: Vec<f64> = (0..6)
370            .map(|i| 2.0 * x.get(i, 0) + 3.0 * x.get(i, 1) + 1.0)
371            .collect();
372        (x, y)
373    }
374
375    #[test]
376    fn test_linear_regression_metadata() {
377        let kernel = LinearRegression::new();
378        assert_eq!(kernel.metadata().id, "ml/linear-regression");
379        assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
380    }
381
382    #[test]
383    fn test_linear_regression_fit() {
384        let (x, y) = create_linear_data();
385        let result = LinearRegression::compute(&x, &y, true);
386
387        // Should recover coefficients [2, 3] and intercept 1
388        assert!(
389            (result.coefficients[0] - 2.0).abs() < 0.01,
390            "Expected coef[0] ≈ 2.0, got {}",
391            result.coefficients[0]
392        );
393        assert!(
394            (result.coefficients[1] - 3.0).abs() < 0.01,
395            "Expected coef[1] ≈ 3.0, got {}",
396            result.coefficients[1]
397        );
398        assert!(
399            (result.intercept - 1.0).abs() < 0.01,
400            "Expected intercept ≈ 1.0, got {}",
401            result.intercept
402        );
403
404        // R² should be 1.0 for perfect linear data
405        assert!(
406            result.r2_score > 0.99,
407            "Expected R² ≈ 1.0, got {}",
408            result.r2_score
409        );
410    }
411
412    #[test]
413    fn test_ridge_regression_metadata() {
414        let kernel = RidgeRegression::new();
415        assert_eq!(kernel.metadata().id, "ml/ridge-regression");
416        assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
417    }
418
419    #[test]
420    fn test_ridge_regression_fit() {
421        let (x, y) = create_linear_data();
422
423        // With small regularization, should be close to OLS
424        let result = RidgeRegression::compute(&x, &y, 0.001, true);
425
426        assert!(
427            (result.coefficients[0] - 2.0).abs() < 0.1,
428            "Expected coef[0] ≈ 2.0, got {}",
429            result.coefficients[0]
430        );
431        assert!(
432            (result.coefficients[1] - 3.0).abs() < 0.1,
433            "Expected coef[1] ≈ 3.0, got {}",
434            result.coefficients[1]
435        );
436
437        assert!(
438            result.r2_score > 0.95,
439            "Expected high R², got {}",
440            result.r2_score
441        );
442    }
443
444    #[test]
445    fn test_ridge_regularization_effect() {
446        let (x, y) = create_linear_data();
447
448        let result_low = RidgeRegression::compute(&x, &y, 0.001, true);
449        let result_high = RidgeRegression::compute(&x, &y, 100.0, true);
450
451        // High regularization should shrink coefficients
452        let coef_norm_low: f64 = result_low
453            .coefficients
454            .iter()
455            .map(|c| c.powi(2))
456            .sum::<f64>()
457            .sqrt();
458        let coef_norm_high: f64 = result_high
459            .coefficients
460            .iter()
461            .map(|c| c.powi(2))
462            .sum::<f64>()
463            .sqrt();
464
465        assert!(
466            coef_norm_high < coef_norm_low,
467            "Higher regularization should shrink coefficients: {} < {}",
468            coef_norm_high,
469            coef_norm_low
470        );
471    }
472
473    #[test]
474    fn test_linear_regression_empty() {
475        let x = DataMatrix::zeros(0, 2);
476        let y: Vec<f64> = Vec::new();
477        let result = LinearRegression::compute(&x, &y, true);
478        assert!(result.coefficients.is_empty());
479    }
480}