rustkernel_ml/
regression.rs

1//! Regression kernels.
2//!
3//! This module provides regression algorithms:
4//! - Linear regression (OLS via normal equations)
5//! - Ridge regression (L2 regularization)
6
7use crate::types::{DataMatrix, RegressionResult};
8use rustkernel_core::{domain::Domain, kernel::KernelMetadata, traits::GpuKernel};
9
10// ============================================================================
11// Linear Regression Kernel
12// ============================================================================
13
14/// Linear regression kernel.
15///
16/// Ordinary Least Squares regression using the normal equations:
17/// β = (X^T X)^(-1) X^T y
18#[derive(Debug, Clone)]
19pub struct LinearRegression {
20    metadata: KernelMetadata,
21}
22
23impl Default for LinearRegression {
24    fn default() -> Self {
25        Self::new()
26    }
27}
28
29impl LinearRegression {
30    /// Create a new linear regression kernel.
31    #[must_use]
32    pub fn new() -> Self {
33        Self {
34            metadata: KernelMetadata::batch("ml/linear-regression", Domain::StatisticalML)
35                .with_description("OLS linear regression via normal equations")
36                .with_throughput(50_000)
37                .with_latency_us(20.0),
38        }
39    }
40
41    /// Fit linear regression model.
42    ///
43    /// # Arguments
44    /// * `x` - Feature matrix (n_samples x n_features)
45    /// * `y` - Target vector (n_samples)
46    /// * `fit_intercept` - Whether to fit an intercept term
47    pub fn compute(x: &DataMatrix, y: &[f64], fit_intercept: bool) -> RegressionResult {
48        let n = x.n_samples;
49        let d = x.n_features;
50
51        if n == 0 || d == 0 || y.len() != n {
52            return RegressionResult {
53                coefficients: Vec::new(),
54                intercept: 0.0,
55                r2_score: 0.0,
56            };
57        }
58
59        // Optionally add intercept column
60        let (x_aug, d_aug) = if fit_intercept {
61            let mut aug_data = Vec::with_capacity(n * (d + 1));
62            for i in 0..n {
63                aug_data.push(1.0); // Intercept column
64                aug_data.extend_from_slice(x.row(i));
65            }
66            (DataMatrix::new(aug_data, n, d + 1), d + 1)
67        } else {
68            (x.clone(), d)
69        };
70
71        // Compute X^T X
72        let xtx = Self::matrix_multiply_transpose_left(&x_aug);
73
74        // Compute X^T y
75        let xty = Self::matrix_vector_multiply_transpose(&x_aug, y);
76
77        // Solve (X^T X) β = X^T y using Cholesky decomposition
78        let coefficients = match Self::solve_positive_definite(&xtx, &xty, d_aug) {
79            Some(c) => c,
80            None => vec![0.0; d_aug], // Fall back to zeros if singular
81        };
82
83        // Extract intercept if fitted
84        let (intercept, coefs) = if fit_intercept {
85            (coefficients[0], coefficients[1..].to_vec())
86        } else {
87            (0.0, coefficients)
88        };
89
90        // Compute predictions and R²
91        let predictions: Vec<f64> = (0..n)
92            .map(|i| {
93                let xi = x.row(i);
94                intercept + coefs.iter().zip(xi.iter()).map(|(c, x)| c * x).sum::<f64>()
95            })
96            .collect();
97
98        let r2 = Self::compute_r2(y, &predictions);
99
100        RegressionResult {
101            coefficients: coefs,
102            intercept,
103            r2_score: r2,
104        }
105    }
106
107    /// Compute X^T X (symmetric positive semi-definite).
108    fn matrix_multiply_transpose_left(x: &DataMatrix) -> Vec<f64> {
109        let n = x.n_samples;
110        let d = x.n_features;
111        let mut result = vec![0.0f64; d * d];
112
113        for i in 0..d {
114            for j in 0..=i {
115                let mut sum = 0.0;
116                for k in 0..n {
117                    sum += x.get(k, i) * x.get(k, j);
118                }
119                result[i * d + j] = sum;
120                result[j * d + i] = sum; // Symmetric
121            }
122        }
123
124        result
125    }
126
127    /// Compute X^T y.
128    fn matrix_vector_multiply_transpose(x: &DataMatrix, y: &[f64]) -> Vec<f64> {
129        let n = x.n_samples;
130        let d = x.n_features;
131        let mut result = vec![0.0f64; d];
132
133        for j in 0..d {
134            for i in 0..n {
135                result[j] += x.get(i, j) * y[i];
136            }
137        }
138
139        result
140    }
141
142    /// Solve Ax = b for positive definite A using Cholesky decomposition.
143    fn solve_positive_definite(a: &[f64], b: &[f64], n: usize) -> Option<Vec<f64>> {
144        // Cholesky decomposition: A = L L^T
145        let mut l = vec![0.0f64; n * n];
146
147        for i in 0..n {
148            for j in 0..=i {
149                let mut sum = a[i * n + j];
150                for k in 0..j {
151                    sum -= l[i * n + k] * l[j * n + k];
152                }
153
154                if i == j {
155                    if sum <= 0.0 {
156                        // Add regularization for numerical stability
157                        let reg_sum = sum + 1e-10;
158                        if reg_sum <= 0.0 {
159                            return None;
160                        }
161                        l[i * n + j] = reg_sum.sqrt();
162                    } else {
163                        l[i * n + j] = sum.sqrt();
164                    }
165                } else {
166                    l[i * n + j] = sum / l[j * n + j];
167                }
168            }
169        }
170
171        // Forward substitution: L y = b
172        let mut y = vec![0.0f64; n];
173        for i in 0..n {
174            let mut sum = b[i];
175            for j in 0..i {
176                sum -= l[i * n + j] * y[j];
177            }
178            y[i] = sum / l[i * n + i];
179        }
180
181        // Backward substitution: L^T x = y
182        let mut x = vec![0.0f64; n];
183        for i in (0..n).rev() {
184            let mut sum = y[i];
185            for j in (i + 1)..n {
186                sum -= l[j * n + i] * x[j]; // L^T[i][j] = L[j][i]
187            }
188            x[i] = sum / l[i * n + i];
189        }
190
191        Some(x)
192    }
193
194    /// Compute R² score.
195    fn compute_r2(y_true: &[f64], y_pred: &[f64]) -> f64 {
196        let n = y_true.len();
197        if n == 0 {
198            return 0.0;
199        }
200
201        let y_mean: f64 = y_true.iter().sum::<f64>() / n as f64;
202
203        let ss_tot: f64 = y_true.iter().map(|&y| (y - y_mean).powi(2)).sum();
204        let ss_res: f64 = y_true
205            .iter()
206            .zip(y_pred.iter())
207            .map(|(&yt, &yp)| (yt - yp).powi(2))
208            .sum();
209
210        if ss_tot == 0.0 {
211            return if ss_res == 0.0 { 1.0 } else { 0.0 };
212        }
213
214        1.0 - ss_res / ss_tot
215    }
216}
217
218impl GpuKernel for LinearRegression {
219    fn metadata(&self) -> &KernelMetadata {
220        &self.metadata
221    }
222}
223
224// ============================================================================
225// Ridge Regression Kernel
226// ============================================================================
227
228/// Ridge regression kernel.
229///
230/// Linear regression with L2 regularization:
231/// β = (X^T X + αI)^(-1) X^T y
232#[derive(Debug, Clone)]
233pub struct RidgeRegression {
234    metadata: KernelMetadata,
235}
236
237impl Default for RidgeRegression {
238    fn default() -> Self {
239        Self::new()
240    }
241}
242
243impl RidgeRegression {
244    /// Create a new ridge regression kernel.
245    #[must_use]
246    pub fn new() -> Self {
247        Self {
248            metadata: KernelMetadata::batch("ml/ridge-regression", Domain::StatisticalML)
249                .with_description("Ridge regression with L2 regularization")
250                .with_throughput(50_000)
251                .with_latency_us(20.0),
252        }
253    }
254
255    /// Fit ridge regression model.
256    ///
257    /// # Arguments
258    /// * `x` - Feature matrix (n_samples x n_features)
259    /// * `y` - Target vector (n_samples)
260    /// * `alpha` - Regularization strength
261    /// * `fit_intercept` - Whether to fit an intercept term
262    pub fn compute(x: &DataMatrix, y: &[f64], alpha: f64, fit_intercept: bool) -> RegressionResult {
263        let n = x.n_samples;
264        let d = x.n_features;
265
266        if n == 0 || d == 0 || y.len() != n {
267            return RegressionResult {
268                coefficients: Vec::new(),
269                intercept: 0.0,
270                r2_score: 0.0,
271            };
272        }
273
274        // Center data if fitting intercept (standard approach for Ridge)
275        let (x_centered, y_centered, x_mean, y_mean) = if fit_intercept {
276            let x_mean: Vec<f64> = (0..d)
277                .map(|j| (0..n).map(|i| x.get(i, j)).sum::<f64>() / n as f64)
278                .collect();
279            let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
280
281            let mut x_data = Vec::with_capacity(n * d);
282            for i in 0..n {
283                for j in 0..d {
284                    x_data.push(x.get(i, j) - x_mean[j]);
285                }
286            }
287
288            let y_centered: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
289
290            (DataMatrix::new(x_data, n, d), y_centered, x_mean, y_mean)
291        } else {
292            (x.clone(), y.to_vec(), vec![0.0; d], 0.0)
293        };
294
295        // Compute X^T X + αI
296        let mut xtx = LinearRegression::matrix_multiply_transpose_left(&x_centered);
297
298        // Add regularization to diagonal
299        for i in 0..d {
300            xtx[i * d + i] += alpha;
301        }
302
303        // Compute X^T y
304        let xty = LinearRegression::matrix_vector_multiply_transpose(&x_centered, &y_centered);
305
306        // Solve (X^T X + αI) β = X^T y
307        let coefficients = match LinearRegression::solve_positive_definite(&xtx, &xty, d) {
308            Some(c) => c,
309            None => vec![0.0; d],
310        };
311
312        // Compute intercept: b = y_mean - Σ(β_j * x_mean_j)
313        let intercept = if fit_intercept {
314            y_mean
315                - coefficients
316                    .iter()
317                    .zip(x_mean.iter())
318                    .map(|(c, m)| c * m)
319                    .sum::<f64>()
320        } else {
321            0.0
322        };
323
324        // Compute predictions and R²
325        let predictions: Vec<f64> = (0..n)
326            .map(|i| {
327                let xi = x.row(i);
328                intercept
329                    + coefficients
330                        .iter()
331                        .zip(xi.iter())
332                        .map(|(c, x)| c * x)
333                        .sum::<f64>()
334            })
335            .collect();
336
337        let r2 = LinearRegression::compute_r2(y, &predictions);
338
339        RegressionResult {
340            coefficients,
341            intercept,
342            r2_score: r2,
343        }
344    }
345}
346
347impl GpuKernel for RidgeRegression {
348    fn metadata(&self) -> &KernelMetadata {
349        &self.metadata
350    }
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    fn create_linear_data() -> (DataMatrix, Vec<f64>) {
358        // y = 2*x1 + 3*x2 + 1
359        let x = DataMatrix::from_rows(&[
360            &[1.0, 1.0],
361            &[2.0, 1.0],
362            &[1.0, 2.0],
363            &[2.0, 2.0],
364            &[3.0, 1.0],
365            &[1.0, 3.0],
366        ]);
367        let y: Vec<f64> = (0..6)
368            .map(|i| 2.0 * x.get(i, 0) + 3.0 * x.get(i, 1) + 1.0)
369            .collect();
370        (x, y)
371    }
372
373    #[test]
374    fn test_linear_regression_metadata() {
375        let kernel = LinearRegression::new();
376        assert_eq!(kernel.metadata().id, "ml/linear-regression");
377        assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
378    }
379
380    #[test]
381    fn test_linear_regression_fit() {
382        let (x, y) = create_linear_data();
383        let result = LinearRegression::compute(&x, &y, true);
384
385        // Should recover coefficients [2, 3] and intercept 1
386        assert!(
387            (result.coefficients[0] - 2.0).abs() < 0.01,
388            "Expected coef[0] ≈ 2.0, got {}",
389            result.coefficients[0]
390        );
391        assert!(
392            (result.coefficients[1] - 3.0).abs() < 0.01,
393            "Expected coef[1] ≈ 3.0, got {}",
394            result.coefficients[1]
395        );
396        assert!(
397            (result.intercept - 1.0).abs() < 0.01,
398            "Expected intercept ≈ 1.0, got {}",
399            result.intercept
400        );
401
402        // R² should be 1.0 for perfect linear data
403        assert!(
404            result.r2_score > 0.99,
405            "Expected R² ≈ 1.0, got {}",
406            result.r2_score
407        );
408    }
409
410    #[test]
411    fn test_ridge_regression_metadata() {
412        let kernel = RidgeRegression::new();
413        assert_eq!(kernel.metadata().id, "ml/ridge-regression");
414        assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
415    }
416
417    #[test]
418    fn test_ridge_regression_fit() {
419        let (x, y) = create_linear_data();
420
421        // With small regularization, should be close to OLS
422        let result = RidgeRegression::compute(&x, &y, 0.001, true);
423
424        assert!(
425            (result.coefficients[0] - 2.0).abs() < 0.1,
426            "Expected coef[0] ≈ 2.0, got {}",
427            result.coefficients[0]
428        );
429        assert!(
430            (result.coefficients[1] - 3.0).abs() < 0.1,
431            "Expected coef[1] ≈ 3.0, got {}",
432            result.coefficients[1]
433        );
434
435        assert!(
436            result.r2_score > 0.95,
437            "Expected high R², got {}",
438            result.r2_score
439        );
440    }
441
442    #[test]
443    fn test_ridge_regularization_effect() {
444        let (x, y) = create_linear_data();
445
446        let result_low = RidgeRegression::compute(&x, &y, 0.001, true);
447        let result_high = RidgeRegression::compute(&x, &y, 100.0, true);
448
449        // High regularization should shrink coefficients
450        let coef_norm_low: f64 = result_low
451            .coefficients
452            .iter()
453            .map(|c| c.powi(2))
454            .sum::<f64>()
455            .sqrt();
456        let coef_norm_high: f64 = result_high
457            .coefficients
458            .iter()
459            .map(|c| c.powi(2))
460            .sum::<f64>()
461            .sqrt();
462
463        assert!(
464            coef_norm_high < coef_norm_low,
465            "Higher regularization should shrink coefficients: {} < {}",
466            coef_norm_high,
467            coef_norm_low
468        );
469    }
470
471    #[test]
472    fn test_linear_regression_empty() {
473        let x = DataMatrix::zeros(0, 2);
474        let y: Vec<f64> = Vec::new();
475        let result = LinearRegression::compute(&x, &y, true);
476        assert!(result.coefficients.is_empty());
477    }
478}