1use crate::types::{DataMatrix, RegressionResult};
8use rustkernel_core::{domain::Domain, kernel::KernelMetadata, traits::GpuKernel};
9
10#[derive(Debug, Clone)]
19pub struct LinearRegression {
20 metadata: KernelMetadata,
21}
22
23impl Default for LinearRegression {
24 fn default() -> Self {
25 Self::new()
26 }
27}
28
29impl LinearRegression {
30 #[must_use]
32 pub fn new() -> Self {
33 Self {
34 metadata: KernelMetadata::batch("ml/linear-regression", Domain::StatisticalML)
35 .with_description("OLS linear regression via normal equations")
36 .with_throughput(50_000)
37 .with_latency_us(20.0),
38 }
39 }
40
41 pub fn compute(x: &DataMatrix, y: &[f64], fit_intercept: bool) -> RegressionResult {
48 let n = x.n_samples;
49 let d = x.n_features;
50
51 if n == 0 || d == 0 || y.len() != n {
52 return RegressionResult {
53 coefficients: Vec::new(),
54 intercept: 0.0,
55 r2_score: 0.0,
56 };
57 }
58
59 let (x_aug, d_aug) = if fit_intercept {
61 let mut aug_data = Vec::with_capacity(n * (d + 1));
62 for i in 0..n {
63 aug_data.push(1.0); aug_data.extend_from_slice(x.row(i));
65 }
66 (DataMatrix::new(aug_data, n, d + 1), d + 1)
67 } else {
68 (x.clone(), d)
69 };
70
71 let xtx = Self::matrix_multiply_transpose_left(&x_aug);
73
74 let xty = Self::matrix_vector_multiply_transpose(&x_aug, y);
76
77 let coefficients = match Self::solve_positive_definite(&xtx, &xty, d_aug) {
79 Some(c) => c,
80 None => vec![0.0; d_aug], };
82
83 let (intercept, coefs) = if fit_intercept {
85 (coefficients[0], coefficients[1..].to_vec())
86 } else {
87 (0.0, coefficients)
88 };
89
90 let predictions: Vec<f64> = (0..n)
92 .map(|i| {
93 let xi = x.row(i);
94 intercept + coefs.iter().zip(xi.iter()).map(|(c, x)| c * x).sum::<f64>()
95 })
96 .collect();
97
98 let r2 = Self::compute_r2(y, &predictions);
99
100 RegressionResult {
101 coefficients: coefs,
102 intercept,
103 r2_score: r2,
104 }
105 }
106
107 fn matrix_multiply_transpose_left(x: &DataMatrix) -> Vec<f64> {
109 let n = x.n_samples;
110 let d = x.n_features;
111 let mut result = vec![0.0f64; d * d];
112
113 for i in 0..d {
114 for j in 0..=i {
115 let mut sum = 0.0;
116 for k in 0..n {
117 sum += x.get(k, i) * x.get(k, j);
118 }
119 result[i * d + j] = sum;
120 result[j * d + i] = sum; }
122 }
123
124 result
125 }
126
127 #[allow(clippy::needless_range_loop)]
129 fn matrix_vector_multiply_transpose(x: &DataMatrix, y: &[f64]) -> Vec<f64> {
130 let n = x.n_samples;
131 let d = x.n_features;
132 let mut result = vec![0.0f64; d];
133
134 for j in 0..d {
135 for i in 0..n {
136 result[j] += x.get(i, j) * y[i];
137 }
138 }
139
140 result
141 }
142
143 fn solve_positive_definite(a: &[f64], b: &[f64], n: usize) -> Option<Vec<f64>> {
145 let mut l = vec![0.0f64; n * n];
147
148 for i in 0..n {
149 for j in 0..=i {
150 let mut sum = a[i * n + j];
151 for k in 0..j {
152 sum -= l[i * n + k] * l[j * n + k];
153 }
154
155 if i == j {
156 if sum <= 0.0 {
157 let reg_sum = sum + 1e-10;
159 if reg_sum <= 0.0 {
160 return None;
161 }
162 l[i * n + j] = reg_sum.sqrt();
163 } else {
164 l[i * n + j] = sum.sqrt();
165 }
166 } else {
167 l[i * n + j] = sum / l[j * n + j];
168 }
169 }
170 }
171
172 let mut y = vec![0.0f64; n];
174 for i in 0..n {
175 let mut sum = b[i];
176 for j in 0..i {
177 sum -= l[i * n + j] * y[j];
178 }
179 y[i] = sum / l[i * n + i];
180 }
181
182 let mut x = vec![0.0f64; n];
184 for i in (0..n).rev() {
185 let mut sum = y[i];
186 for j in (i + 1)..n {
187 sum -= l[j * n + i] * x[j]; }
189 x[i] = sum / l[i * n + i];
190 }
191
192 Some(x)
193 }
194
195 fn compute_r2(y_true: &[f64], y_pred: &[f64]) -> f64 {
197 let n = y_true.len();
198 if n == 0 {
199 return 0.0;
200 }
201
202 let y_mean: f64 = y_true.iter().sum::<f64>() / n as f64;
203
204 let ss_tot: f64 = y_true.iter().map(|&y| (y - y_mean).powi(2)).sum();
205 let ss_res: f64 = y_true
206 .iter()
207 .zip(y_pred.iter())
208 .map(|(&yt, &yp)| (yt - yp).powi(2))
209 .sum();
210
211 if ss_tot == 0.0 {
212 return if ss_res == 0.0 { 1.0 } else { 0.0 };
213 }
214
215 1.0 - ss_res / ss_tot
216 }
217}
218
219impl GpuKernel for LinearRegression {
220 fn metadata(&self) -> &KernelMetadata {
221 &self.metadata
222 }
223}
224
225#[derive(Debug, Clone)]
234pub struct RidgeRegression {
235 metadata: KernelMetadata,
236}
237
238impl Default for RidgeRegression {
239 fn default() -> Self {
240 Self::new()
241 }
242}
243
244impl RidgeRegression {
245 #[must_use]
247 pub fn new() -> Self {
248 Self {
249 metadata: KernelMetadata::batch("ml/ridge-regression", Domain::StatisticalML)
250 .with_description("Ridge regression with L2 regularization")
251 .with_throughput(50_000)
252 .with_latency_us(20.0),
253 }
254 }
255
256 #[allow(clippy::needless_range_loop)]
264 pub fn compute(x: &DataMatrix, y: &[f64], alpha: f64, fit_intercept: bool) -> RegressionResult {
265 let n = x.n_samples;
266 let d = x.n_features;
267
268 if n == 0 || d == 0 || y.len() != n {
269 return RegressionResult {
270 coefficients: Vec::new(),
271 intercept: 0.0,
272 r2_score: 0.0,
273 };
274 }
275
276 let (x_centered, y_centered, x_mean, y_mean) = if fit_intercept {
278 let x_mean: Vec<f64> = (0..d)
279 .map(|j| (0..n).map(|i| x.get(i, j)).sum::<f64>() / n as f64)
280 .collect();
281 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
282
283 let mut x_data = Vec::with_capacity(n * d);
284 for i in 0..n {
285 for j in 0..d {
286 x_data.push(x.get(i, j) - x_mean[j]);
287 }
288 }
289
290 let y_centered: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
291
292 (DataMatrix::new(x_data, n, d), y_centered, x_mean, y_mean)
293 } else {
294 (x.clone(), y.to_vec(), vec![0.0; d], 0.0)
295 };
296
297 let mut xtx = LinearRegression::matrix_multiply_transpose_left(&x_centered);
299
300 for i in 0..d {
302 xtx[i * d + i] += alpha;
303 }
304
305 let xty = LinearRegression::matrix_vector_multiply_transpose(&x_centered, &y_centered);
307
308 let coefficients = match LinearRegression::solve_positive_definite(&xtx, &xty, d) {
310 Some(c) => c,
311 None => vec![0.0; d],
312 };
313
314 let intercept = if fit_intercept {
316 y_mean
317 - coefficients
318 .iter()
319 .zip(x_mean.iter())
320 .map(|(c, m)| c * m)
321 .sum::<f64>()
322 } else {
323 0.0
324 };
325
326 let predictions: Vec<f64> = (0..n)
328 .map(|i| {
329 let xi = x.row(i);
330 intercept
331 + coefficients
332 .iter()
333 .zip(xi.iter())
334 .map(|(c, x)| c * x)
335 .sum::<f64>()
336 })
337 .collect();
338
339 let r2 = LinearRegression::compute_r2(y, &predictions);
340
341 RegressionResult {
342 coefficients,
343 intercept,
344 r2_score: r2,
345 }
346 }
347}
348
349impl GpuKernel for RidgeRegression {
350 fn metadata(&self) -> &KernelMetadata {
351 &self.metadata
352 }
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358
359 fn create_linear_data() -> (DataMatrix, Vec<f64>) {
360 let x = DataMatrix::from_rows(&[
362 &[1.0, 1.0],
363 &[2.0, 1.0],
364 &[1.0, 2.0],
365 &[2.0, 2.0],
366 &[3.0, 1.0],
367 &[1.0, 3.0],
368 ]);
369 let y: Vec<f64> = (0..6)
370 .map(|i| 2.0 * x.get(i, 0) + 3.0 * x.get(i, 1) + 1.0)
371 .collect();
372 (x, y)
373 }
374
375 #[test]
376 fn test_linear_regression_metadata() {
377 let kernel = LinearRegression::new();
378 assert_eq!(kernel.metadata().id, "ml/linear-regression");
379 assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
380 }
381
382 #[test]
383 fn test_linear_regression_fit() {
384 let (x, y) = create_linear_data();
385 let result = LinearRegression::compute(&x, &y, true);
386
387 assert!(
389 (result.coefficients[0] - 2.0).abs() < 0.01,
390 "Expected coef[0] ≈ 2.0, got {}",
391 result.coefficients[0]
392 );
393 assert!(
394 (result.coefficients[1] - 3.0).abs() < 0.01,
395 "Expected coef[1] ≈ 3.0, got {}",
396 result.coefficients[1]
397 );
398 assert!(
399 (result.intercept - 1.0).abs() < 0.01,
400 "Expected intercept ≈ 1.0, got {}",
401 result.intercept
402 );
403
404 assert!(
406 result.r2_score > 0.99,
407 "Expected R² ≈ 1.0, got {}",
408 result.r2_score
409 );
410 }
411
412 #[test]
413 fn test_ridge_regression_metadata() {
414 let kernel = RidgeRegression::new();
415 assert_eq!(kernel.metadata().id, "ml/ridge-regression");
416 assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
417 }
418
419 #[test]
420 fn test_ridge_regression_fit() {
421 let (x, y) = create_linear_data();
422
423 let result = RidgeRegression::compute(&x, &y, 0.001, true);
425
426 assert!(
427 (result.coefficients[0] - 2.0).abs() < 0.1,
428 "Expected coef[0] ≈ 2.0, got {}",
429 result.coefficients[0]
430 );
431 assert!(
432 (result.coefficients[1] - 3.0).abs() < 0.1,
433 "Expected coef[1] ≈ 3.0, got {}",
434 result.coefficients[1]
435 );
436
437 assert!(
438 result.r2_score > 0.95,
439 "Expected high R², got {}",
440 result.r2_score
441 );
442 }
443
444 #[test]
445 fn test_ridge_regularization_effect() {
446 let (x, y) = create_linear_data();
447
448 let result_low = RidgeRegression::compute(&x, &y, 0.001, true);
449 let result_high = RidgeRegression::compute(&x, &y, 100.0, true);
450
451 let coef_norm_low: f64 = result_low
453 .coefficients
454 .iter()
455 .map(|c| c.powi(2))
456 .sum::<f64>()
457 .sqrt();
458 let coef_norm_high: f64 = result_high
459 .coefficients
460 .iter()
461 .map(|c| c.powi(2))
462 .sum::<f64>()
463 .sqrt();
464
465 assert!(
466 coef_norm_high < coef_norm_low,
467 "Higher regularization should shrink coefficients: {} < {}",
468 coef_norm_high,
469 coef_norm_low
470 );
471 }
472
473 #[test]
474 fn test_linear_regression_empty() {
475 let x = DataMatrix::zeros(0, 2);
476 let y: Vec<f64> = Vec::new();
477 let result = LinearRegression::compute(&x, &y, true);
478 assert!(result.coefficients.is_empty());
479 }
480}