35_linear_regression_gradient_descent/35_linear_regression_gradient_descent.rs
1//! # Example: Linear Regression by Gradient Descent
2//!
3//! Run: cargo run --example 35_linear_regression_gradient_descent
4//!
5//! ## Problem
6//! Fit a straight line `y = w*x + b` to a few points by minimizing mean-squared
7//! error, using batch gradient descent rather than a closed-form solution.
8//!
9//! ## Math idea
10//! Stack the data into a design matrix `X` (with a leading bias column) and a target
11//! vector `y`, so the model is `ŷ = X · θ` with `θ = [b, w]`. The MSE gradient is
12//! `(2/n) · Xᵀ · (ŷ - y)`, and each step takes `θ ← θ - lr · gradient`.
13//!
14//! ## Tensor representation
15//! `X` is a `[samples, 2]` `Tensor`; `θ` is length-2. Predictions are one
16//! `Tensor::matmul` (`[n, 2] × [2] -> [n]`) and the gradient is another
17//! (`Xᵀ` is `[2, n]`, so `[2, n] × [n] -> [2]`). The per-step update is plain Rust.
18//!
19//! ## What this demonstrates
20//! - matrix × vector multiplication via `Tensor::matmul`;
21//! - `Tensor::transpose` to form `Xᵀ` once and reuse it;
22//! - an iterative optimizer composing `Tensor` math with ordinary arithmetic.
23//!
24//! ## Expected output
25//! ```text
26//! fitted: y = 2.0000*x + 1.0000
27//! target: y = 2*x + 1
28//! Linear regression (gradient descent): OK
29//! ```
30
31use matten::Tensor;
32
33/// One batch gradient-descent step on the MSE of a linear model `ŷ = X · θ`.
34fn gd_step(x: &Tensor, xt: &Tensor, theta: &Tensor, y: &[f64], lr: f64) -> Tensor {
35 let n = y.len() as f64;
36 let pred = x.matmul(theta); // [n]
37 let residual: Vec<f64> = pred.as_slice().iter().zip(y).map(|(p, t)| p - t).collect();
38 let grad = xt.matmul(&Tensor::from_vec(residual)); // [features]
39 let updated: Vec<f64> = theta
40 .as_slice()
41 .iter()
42 .zip(grad.as_slice())
43 .map(|(w, g)| w - lr * (2.0 / n) * g)
44 .collect();
45 Tensor::from_vec(updated)
46}
47
48fn main() {
49 // Data generated from the true line y = 2x + 1.
50 // Design matrix X carries a leading bias column, so theta = [b, w].
51 let x = Tensor::new(
52 vec![
53 1.0, 0.0, //
54 1.0, 1.0, //
55 1.0, 2.0, //
56 1.0, 3.0, //
57 1.0, 4.0, //
58 ],
59 &[5, 2],
60 );
61 let y = [1.0, 3.0, 5.0, 7.0, 9.0];
62 let xt = x.transpose(); // [2, 5], formed once and reused each step
63
64 let mut theta = Tensor::from_vec(vec![0.0, 0.0]); // [b, w]
65 let lr = 0.05;
66 for _ in 0..2000 {
67 theta = gd_step(&x, &xt, &theta, &y, lr);
68 }
69
70 let p = theta.as_slice();
71 println!("fitted: y = {:.4}*x + {:.4}", p[1], p[0]);
72 println!("target: y = 2*x + 1");
73
74 assert!((p[0] - 1.0).abs() < 0.05, "intercept ~ 1");
75 assert!((p[1] - 2.0).abs() < 0.05, "slope ~ 2");
76 println!("Linear regression (gradient descent): OK");
77}