Skip to main content

rust_mlp/
lib.rs

1//! A small MLP (multi-layer perceptron) crate.
2//!
3//! `rust-mlp` is a small-core, from-scratch implementation of a dense feed-forward network.
4//! It is designed to be easy to read while keeping the per-sample hot path allocation-free.
5//!
6//! # Design goals
7//!
8//! - Predictable performance: reuse buffers (`Scratch` / `Gradients`) instead of allocating.
9//! - Clear contracts: shapes are explicit and validated at the API boundary.
10//! - Practical training loop: `fit` supports mini-batches, shuffling, LR schedules, and common
11//!   optimizers.
12//!
13//! # Panics vs `Result`
14//!
15//! This crate intentionally exposes two layers of API:
16//!
17//! - Low-level hot path (panics on misuse):
18//!   - [`mlp::Mlp::forward`], [`mlp::Mlp::backward`]
19//!   - [`mlp::Mlp::forward_batch`], [`mlp::Mlp::backward_batch`]
20//!     Shape mismatches are treated as programmer error and will panic via `assert!`.
21//!
22//! - High-level convenience APIs (shape-checked):
23//!   - [`crate::Mlp::fit`], [`crate::Mlp::evaluate`]
24//!   - [`mlp::Mlp::predict_into`]
25//!     These validate inputs and return [`Result`].
26//!
27//! # Data layout and shapes
28//!
29//! - Scalars are `f32`.
30//! - [`Dataset`] and [`Inputs`] store samples contiguously in row-major layout.
31//! - Layer weights are row-major with shape `(out_dim, in_dim)`.
32//! - Batched inputs/outputs are passed as flat row-major buffers:
33//!   - inputs: `(batch_size, input_dim)` as `batch_size * input_dim` scalars
34//!   - outputs: `(batch_size, output_dim)` as `batch_size * output_dim` scalars
35//!
36//! # MSRV
37//!
38//! This crate's minimum supported Rust version (MSRV) is specified in `Cargo.toml`.
39//!
40//! See `ROADMAP.md` for the production-readiness plan.
41
42//! # Quick start
43//!
44//! ```rust
45//! use rust_mlp::{Activation, FitConfig, Loss, Metric, MlpBuilder};
46//!
47//! # fn main() -> rust_mlp::Result<()> {
48//! let xs = vec![
49//!     vec![0.0, 0.0],
50//!     vec![0.0, 1.0],
51//!     vec![1.0, 0.0],
52//!     vec![1.0, 1.0],
53//! ];
54//! let ys = vec![vec![0.0], vec![1.0], vec![1.0], vec![0.0]];
55//! let train = rust_mlp::Dataset::from_rows(&xs, &ys)?;
56//!
57//! let mut mlp = MlpBuilder::new(2)?
58//!     .add_layer(8, Activation::ReLU)?
59//!     .add_layer(1, Activation::Sigmoid)?
60//!     .build_with_seed(0)?;
61//!
62//! let _report = mlp.fit(
63//!     &train,
64//!     None,
65//!     FitConfig {
66//!         epochs: 200,
67//!         lr: 0.1,
68//!         batch_size: 4,
69//!         shuffle: rust_mlp::Shuffle::Seeded(0),
70//!         lr_schedule: rust_mlp::LrSchedule::Constant,
71//!         optimizer: rust_mlp::Optimizer::Adam {
72//!             beta1: 0.9,
73//!             beta2: 0.999,
74//!             eps: 1e-8,
75//!         },
76//!         weight_decay: 0.0,
77//!         grad_clip_norm: None,
78//!         loss: Loss::Mse,
79//!         metrics: vec![Metric::Accuracy],
80//!     },
81//! )?;
82//! Ok(())
83//! # }
84//! ```
85
86//! # Allocation-free training (advanced)
87//!
88//! If you want to drive training yourself (e.g. custom loop), allocate buffers once and reuse
89//! them across steps:
90//!
91//! ```rust
92//! use rust_mlp::{Activation, Loss, MlpBuilder};
93//!
94//! # fn main() -> rust_mlp::Result<()> {
95//! let mut mlp = MlpBuilder::new(3)?
96//!     .add_layer(8, Activation::Tanh)?
97//!     .add_layer(2, Activation::Identity)?
98//!     .build_with_seed(0)?;
99//!
100//! let mut trainer = mlp.trainer();
101//! let x = [0.1_f32, -0.2, 0.3];
102//! let t = [0.0_f32, 1.0];
103//!
104//! let y = mlp.forward(&x, &mut trainer.scratch);
105//! let _loss = Loss::Mse.backward(y, &t, trainer.grads.d_output_mut());
106//! mlp.backward(&x, &trainer.scratch, &mut trainer.grads);
107//! mlp.sgd_step(&trainer.grads, 1e-2);
108//! Ok(())
109//! # }
110//! ```
111
112pub mod activation;
113pub mod builder;
114pub mod data;
115pub mod error;
116pub mod layer;
117pub mod loss;
118pub(crate) mod matmul;
119pub mod metrics;
120pub mod mlp;
121pub mod optim;
122pub mod train;
123
124#[cfg(feature = "serde")]
125pub mod serde_model;
126
127pub use activation::Activation;
128pub use builder::MlpBuilder;
129pub use data::{Dataset, Inputs};
130pub use error::{Error, Result};
131pub use layer::{Init, Layer};
132pub use loss::Loss;
133pub use metrics::Metric;
134pub use mlp::Trainer;
135pub use mlp::{BatchBackpropScratch, BatchScratch, Gradients, Mlp, Scratch};
136pub use optim::{Optimizer, OptimizerState, Sgd};
137pub use train::Shuffle;
138pub use train::{EpochReport, EvalReport, FitConfig, FitReport, LrSchedule};
139
140/// Shape-safe, non-allocating inference.
141///
142/// Thin wrapper around [`Mlp::predict_into`].
143pub fn predict_into(
144    mlp: &Mlp,
145    input: &[f32],
146    scratch: &mut Scratch,
147    out: &mut [f32],
148) -> Result<()> {
149    mlp.predict_into(input, scratch, out)
150}