Skip to main content

35_linear_regression_gradient_descent/
35_linear_regression_gradient_descent.rs

1//! # Example: Linear Regression by Gradient Descent
2//!
3//! Run: cargo run --example 35_linear_regression_gradient_descent
4//!
5//! ## Problem
6//! Fit a straight line `y = w*x + b` to a few points by minimizing mean-squared
7//! error, using batch gradient descent rather than a closed-form solution.
8//!
9//! ## Math idea
10//! Stack the data into a design matrix `X` (with a leading bias column) and a target
11//! vector `y`, so the model is `ŷ = X · θ` with `θ = [b, w]`. The MSE gradient is
12//! `(2/n) · Xᵀ · (ŷ - y)`, and each step takes `θ ← θ - lr · gradient`.
13//!
14//! ## Tensor representation
15//! `X` is a `[samples, 2]` `Tensor`; `θ` is length-2. Predictions are one
16//! `Tensor::matmul` (`[n, 2] × [2] -> [n]`) and the gradient is another
17//! (`Xᵀ` is `[2, n]`, so `[2, n] × [n] -> [2]`). The per-step update is plain Rust.
18//!
19//! ## What this demonstrates
20//! - matrix × vector multiplication via `Tensor::matmul`;
21//! - `Tensor::transpose` to form `Xᵀ` once and reuse it;
22//! - an iterative optimizer composing `Tensor` math with ordinary arithmetic.
23//!
24//! ## Expected output
25//! ```text
26//! fitted: y = 2.0000*x + 1.0000
27//! target: y = 2*x + 1
28//! Linear regression (gradient descent): OK
29//! ```
30
31use matten::Tensor;
32
33/// One batch gradient-descent step on the MSE of a linear model `ŷ = X · θ`.
34fn gd_step(x: &Tensor, xt: &Tensor, theta: &Tensor, y: &[f64], lr: f64) -> Tensor {
35    let n = y.len() as f64;
36    let pred = x.matmul(theta); // [n]
37    let residual: Vec<f64> = pred.as_slice().iter().zip(y).map(|(p, t)| p - t).collect();
38    let grad = xt.matmul(&Tensor::from_vec(residual)); // [features]
39    let updated: Vec<f64> = theta
40        .as_slice()
41        .iter()
42        .zip(grad.as_slice())
43        .map(|(w, g)| w - lr * (2.0 / n) * g)
44        .collect();
45    Tensor::from_vec(updated)
46}
47
48fn main() {
49    // Data generated from the true line y = 2x + 1.
50    // Design matrix X carries a leading bias column, so theta = [b, w].
51    let x = Tensor::new(
52        vec![
53            1.0, 0.0, //
54            1.0, 1.0, //
55            1.0, 2.0, //
56            1.0, 3.0, //
57            1.0, 4.0, //
58        ],
59        &[5, 2],
60    );
61    let y = [1.0, 3.0, 5.0, 7.0, 9.0];
62    let xt = x.transpose(); // [2, 5], formed once and reused each step
63
64    let mut theta = Tensor::from_vec(vec![0.0, 0.0]); // [b, w]
65    let lr = 0.05;
66    for _ in 0..2000 {
67        theta = gd_step(&x, &xt, &theta, &y, lr);
68    }
69
70    let p = theta.as_slice();
71    println!("fitted: y = {:.4}*x + {:.4}", p[1], p[0]);
72    println!("target: y = 2*x + 1");
73
74    assert!((p[0] - 1.0).abs() < 0.05, "intercept ~ 1");
75    assert!((p[1] - 2.0).abs() < 0.05, "slope ~ 2");
76    println!("Linear regression (gradient descent): OK");
77}