1use crate::types::{DataMatrix, RegressionResult};
8use rustkernel_core::{domain::Domain, kernel::KernelMetadata, traits::GpuKernel};
9
10#[derive(Debug, Clone)]
19pub struct LinearRegression {
20 metadata: KernelMetadata,
21}
22
23impl Default for LinearRegression {
24 fn default() -> Self {
25 Self::new()
26 }
27}
28
29impl LinearRegression {
30 #[must_use]
32 pub fn new() -> Self {
33 Self {
34 metadata: KernelMetadata::batch("ml/linear-regression", Domain::StatisticalML)
35 .with_description("OLS linear regression via normal equations")
36 .with_throughput(50_000)
37 .with_latency_us(20.0),
38 }
39 }
40
41 pub fn compute(x: &DataMatrix, y: &[f64], fit_intercept: bool) -> RegressionResult {
48 let n = x.n_samples;
49 let d = x.n_features;
50
51 if n == 0 || d == 0 || y.len() != n {
52 return RegressionResult {
53 coefficients: Vec::new(),
54 intercept: 0.0,
55 r2_score: 0.0,
56 };
57 }
58
59 let (x_aug, d_aug) = if fit_intercept {
61 let mut aug_data = Vec::with_capacity(n * (d + 1));
62 for i in 0..n {
63 aug_data.push(1.0); aug_data.extend_from_slice(x.row(i));
65 }
66 (DataMatrix::new(aug_data, n, d + 1), d + 1)
67 } else {
68 (x.clone(), d)
69 };
70
71 let xtx = Self::matrix_multiply_transpose_left(&x_aug);
73
74 let xty = Self::matrix_vector_multiply_transpose(&x_aug, y);
76
77 let coefficients = match Self::solve_positive_definite(&xtx, &xty, d_aug) {
79 Some(c) => c,
80 None => vec![0.0; d_aug], };
82
83 let (intercept, coefs) = if fit_intercept {
85 (coefficients[0], coefficients[1..].to_vec())
86 } else {
87 (0.0, coefficients)
88 };
89
90 let predictions: Vec<f64> = (0..n)
92 .map(|i| {
93 let xi = x.row(i);
94 intercept + coefs.iter().zip(xi.iter()).map(|(c, x)| c * x).sum::<f64>()
95 })
96 .collect();
97
98 let r2 = Self::compute_r2(y, &predictions);
99
100 RegressionResult {
101 coefficients: coefs,
102 intercept,
103 r2_score: r2,
104 }
105 }
106
107 fn matrix_multiply_transpose_left(x: &DataMatrix) -> Vec<f64> {
109 let n = x.n_samples;
110 let d = x.n_features;
111 let mut result = vec![0.0f64; d * d];
112
113 for i in 0..d {
114 for j in 0..=i {
115 let mut sum = 0.0;
116 for k in 0..n {
117 sum += x.get(k, i) * x.get(k, j);
118 }
119 result[i * d + j] = sum;
120 result[j * d + i] = sum; }
122 }
123
124 result
125 }
126
127 fn matrix_vector_multiply_transpose(x: &DataMatrix, y: &[f64]) -> Vec<f64> {
129 let n = x.n_samples;
130 let d = x.n_features;
131 let mut result = vec![0.0f64; d];
132
133 for j in 0..d {
134 for i in 0..n {
135 result[j] += x.get(i, j) * y[i];
136 }
137 }
138
139 result
140 }
141
142 fn solve_positive_definite(a: &[f64], b: &[f64], n: usize) -> Option<Vec<f64>> {
144 let mut l = vec![0.0f64; n * n];
146
147 for i in 0..n {
148 for j in 0..=i {
149 let mut sum = a[i * n + j];
150 for k in 0..j {
151 sum -= l[i * n + k] * l[j * n + k];
152 }
153
154 if i == j {
155 if sum <= 0.0 {
156 let reg_sum = sum + 1e-10;
158 if reg_sum <= 0.0 {
159 return None;
160 }
161 l[i * n + j] = reg_sum.sqrt();
162 } else {
163 l[i * n + j] = sum.sqrt();
164 }
165 } else {
166 l[i * n + j] = sum / l[j * n + j];
167 }
168 }
169 }
170
171 let mut y = vec![0.0f64; n];
173 for i in 0..n {
174 let mut sum = b[i];
175 for j in 0..i {
176 sum -= l[i * n + j] * y[j];
177 }
178 y[i] = sum / l[i * n + i];
179 }
180
181 let mut x = vec![0.0f64; n];
183 for i in (0..n).rev() {
184 let mut sum = y[i];
185 for j in (i + 1)..n {
186 sum -= l[j * n + i] * x[j]; }
188 x[i] = sum / l[i * n + i];
189 }
190
191 Some(x)
192 }
193
194 fn compute_r2(y_true: &[f64], y_pred: &[f64]) -> f64 {
196 let n = y_true.len();
197 if n == 0 {
198 return 0.0;
199 }
200
201 let y_mean: f64 = y_true.iter().sum::<f64>() / n as f64;
202
203 let ss_tot: f64 = y_true.iter().map(|&y| (y - y_mean).powi(2)).sum();
204 let ss_res: f64 = y_true
205 .iter()
206 .zip(y_pred.iter())
207 .map(|(&yt, &yp)| (yt - yp).powi(2))
208 .sum();
209
210 if ss_tot == 0.0 {
211 return if ss_res == 0.0 { 1.0 } else { 0.0 };
212 }
213
214 1.0 - ss_res / ss_tot
215 }
216}
217
218impl GpuKernel for LinearRegression {
219 fn metadata(&self) -> &KernelMetadata {
220 &self.metadata
221 }
222}
223
224#[derive(Debug, Clone)]
233pub struct RidgeRegression {
234 metadata: KernelMetadata,
235}
236
237impl Default for RidgeRegression {
238 fn default() -> Self {
239 Self::new()
240 }
241}
242
243impl RidgeRegression {
244 #[must_use]
246 pub fn new() -> Self {
247 Self {
248 metadata: KernelMetadata::batch("ml/ridge-regression", Domain::StatisticalML)
249 .with_description("Ridge regression with L2 regularization")
250 .with_throughput(50_000)
251 .with_latency_us(20.0),
252 }
253 }
254
255 pub fn compute(x: &DataMatrix, y: &[f64], alpha: f64, fit_intercept: bool) -> RegressionResult {
263 let n = x.n_samples;
264 let d = x.n_features;
265
266 if n == 0 || d == 0 || y.len() != n {
267 return RegressionResult {
268 coefficients: Vec::new(),
269 intercept: 0.0,
270 r2_score: 0.0,
271 };
272 }
273
274 let (x_centered, y_centered, x_mean, y_mean) = if fit_intercept {
276 let x_mean: Vec<f64> = (0..d)
277 .map(|j| (0..n).map(|i| x.get(i, j)).sum::<f64>() / n as f64)
278 .collect();
279 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
280
281 let mut x_data = Vec::with_capacity(n * d);
282 for i in 0..n {
283 for j in 0..d {
284 x_data.push(x.get(i, j) - x_mean[j]);
285 }
286 }
287
288 let y_centered: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
289
290 (DataMatrix::new(x_data, n, d), y_centered, x_mean, y_mean)
291 } else {
292 (x.clone(), y.to_vec(), vec![0.0; d], 0.0)
293 };
294
295 let mut xtx = LinearRegression::matrix_multiply_transpose_left(&x_centered);
297
298 for i in 0..d {
300 xtx[i * d + i] += alpha;
301 }
302
303 let xty = LinearRegression::matrix_vector_multiply_transpose(&x_centered, &y_centered);
305
306 let coefficients = match LinearRegression::solve_positive_definite(&xtx, &xty, d) {
308 Some(c) => c,
309 None => vec![0.0; d],
310 };
311
312 let intercept = if fit_intercept {
314 y_mean
315 - coefficients
316 .iter()
317 .zip(x_mean.iter())
318 .map(|(c, m)| c * m)
319 .sum::<f64>()
320 } else {
321 0.0
322 };
323
324 let predictions: Vec<f64> = (0..n)
326 .map(|i| {
327 let xi = x.row(i);
328 intercept
329 + coefficients
330 .iter()
331 .zip(xi.iter())
332 .map(|(c, x)| c * x)
333 .sum::<f64>()
334 })
335 .collect();
336
337 let r2 = LinearRegression::compute_r2(y, &predictions);
338
339 RegressionResult {
340 coefficients,
341 intercept,
342 r2_score: r2,
343 }
344 }
345}
346
347impl GpuKernel for RidgeRegression {
348 fn metadata(&self) -> &KernelMetadata {
349 &self.metadata
350 }
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356
357 fn create_linear_data() -> (DataMatrix, Vec<f64>) {
358 let x = DataMatrix::from_rows(&[
360 &[1.0, 1.0],
361 &[2.0, 1.0],
362 &[1.0, 2.0],
363 &[2.0, 2.0],
364 &[3.0, 1.0],
365 &[1.0, 3.0],
366 ]);
367 let y: Vec<f64> = (0..6)
368 .map(|i| 2.0 * x.get(i, 0) + 3.0 * x.get(i, 1) + 1.0)
369 .collect();
370 (x, y)
371 }
372
373 #[test]
374 fn test_linear_regression_metadata() {
375 let kernel = LinearRegression::new();
376 assert_eq!(kernel.metadata().id, "ml/linear-regression");
377 assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
378 }
379
380 #[test]
381 fn test_linear_regression_fit() {
382 let (x, y) = create_linear_data();
383 let result = LinearRegression::compute(&x, &y, true);
384
385 assert!(
387 (result.coefficients[0] - 2.0).abs() < 0.01,
388 "Expected coef[0] ≈ 2.0, got {}",
389 result.coefficients[0]
390 );
391 assert!(
392 (result.coefficients[1] - 3.0).abs() < 0.01,
393 "Expected coef[1] ≈ 3.0, got {}",
394 result.coefficients[1]
395 );
396 assert!(
397 (result.intercept - 1.0).abs() < 0.01,
398 "Expected intercept ≈ 1.0, got {}",
399 result.intercept
400 );
401
402 assert!(
404 result.r2_score > 0.99,
405 "Expected R² ≈ 1.0, got {}",
406 result.r2_score
407 );
408 }
409
410 #[test]
411 fn test_ridge_regression_metadata() {
412 let kernel = RidgeRegression::new();
413 assert_eq!(kernel.metadata().id, "ml/ridge-regression");
414 assert_eq!(kernel.metadata().domain, Domain::StatisticalML);
415 }
416
417 #[test]
418 fn test_ridge_regression_fit() {
419 let (x, y) = create_linear_data();
420
421 let result = RidgeRegression::compute(&x, &y, 0.001, true);
423
424 assert!(
425 (result.coefficients[0] - 2.0).abs() < 0.1,
426 "Expected coef[0] ≈ 2.0, got {}",
427 result.coefficients[0]
428 );
429 assert!(
430 (result.coefficients[1] - 3.0).abs() < 0.1,
431 "Expected coef[1] ≈ 3.0, got {}",
432 result.coefficients[1]
433 );
434
435 assert!(
436 result.r2_score > 0.95,
437 "Expected high R², got {}",
438 result.r2_score
439 );
440 }
441
442 #[test]
443 fn test_ridge_regularization_effect() {
444 let (x, y) = create_linear_data();
445
446 let result_low = RidgeRegression::compute(&x, &y, 0.001, true);
447 let result_high = RidgeRegression::compute(&x, &y, 100.0, true);
448
449 let coef_norm_low: f64 = result_low
451 .coefficients
452 .iter()
453 .map(|c| c.powi(2))
454 .sum::<f64>()
455 .sqrt();
456 let coef_norm_high: f64 = result_high
457 .coefficients
458 .iter()
459 .map(|c| c.powi(2))
460 .sum::<f64>()
461 .sqrt();
462
463 assert!(
464 coef_norm_high < coef_norm_low,
465 "Higher regularization should shrink coefficients: {} < {}",
466 coef_norm_high,
467 coef_norm_low
468 );
469 }
470
471 #[test]
472 fn test_linear_regression_empty() {
473 let x = DataMatrix::zeros(0, 2);
474 let y: Vec<f64> = Vec::new();
475 let result = LinearRegression::compute(&x, &y, true);
476 assert!(result.coefficients.is_empty());
477 }
478}