linreg_core/lib.rs
1//! # linreg-core
2//!
3//! A lightweight, self-contained linear regression library in pure Rust.
4//!
5//! **No external math dependencies.** All linear algebra (matrices, QR decomposition)
6//! and statistical functions (distributions, hypothesis tests) are implemented from
7//! scratch. Compiles to WebAssembly for browser use, exposes Python bindings via PyO3,
8//! or runs as a native Rust crate.
9//!
10//! **[Live Demo →](https://jesse-anderson.net/linreg-core/)**
11//!
12//! ## What This Does
13//!
14//! - **OLS Regression** — Ordinary Least Squares with numerically stable QR decomposition
15//! - **Regularized Regression** — Ridge, Lasso, and Elastic Net via coordinate descent
16//! - **WLS Regression** — Weighted Least Squares for heteroscedastic data
17//! - **LOESS** — Non-parametric locally weighted smoothing
18//! - **K-Fold Cross Validation** — Model evaluation for all regression types
19//! - **Prediction Intervals** — Point and interval predictions for all model types
20//! - **Diagnostic Tests** — 14 statistical tests for validating regression assumptions
21//! - **Model Serialization** — Save/load trained models to JSON
22//! - **WASM Support** — Same API works in browsers via WebAssembly
23//! - **Python Bindings** — PyO3 bindings available via `pip install linreg-core`
24//!
25//! ## Quick Start
26//!
27//! ### Native Rust
28//!
29//! Add to `Cargo.toml` (no WASM overhead):
30//!
31//! ```toml
32//! [dependencies]
33//! linreg-core = { version = "0.6", default-features = false }
34//! ```
35//!
36//! ```rust
37//! use linreg_core::core::ols_regression;
38//!
39//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
40//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
41//! let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
42//! let names = vec!["Intercept".into(), "Temp".into(), "Pressure".into()];
43//!
44//! let result = ols_regression(&y, &[x1, x2], &names)?;
45//! println!("R²: {}", result.r_squared);
46//! println!("F-statistic: {}", result.f_statistic);
47//! println!("AIC: {}", result.aic);
48//! # Ok::<(), linreg_core::Error>(())
49//! ```
50//!
51//! ### WebAssembly (JavaScript)
52//!
53//! ```toml
54//! [dependencies]
55//! linreg-core = "0.6"
56//! ```
57//!
58//! Build with `wasm-pack build --target web`, then use in JavaScript:
59//!
60//! ```text
61//! import init, { ols_regression } from './linreg_core.js';
62//! await init();
63//!
64//! const result = JSON.parse(ols_regression(
65//! JSON.stringify([2.5, 3.7, 4.2, 5.1, 6.3]),
66//! JSON.stringify([[1,2,3,4,5], [2,4,5,4,3]]),
67//! JSON.stringify(["Intercept", "X1", "X2"])
68//! ));
69//! console.log("R²:", result.r_squared);
70//! ```
71//!
72//! ## Regularized Regression
73//!
74//! ```no_run
75//! use linreg_core::regularized::{ridge_fit, RidgeFitOptions, lasso_fit, LassoFitOptions};
76//! use linreg_core::linalg::Matrix;
77//!
78//! let x = Matrix::new(100, 3, vec![0.0; 300]);
79//! let y = vec![0.0; 100];
80//!
81//! // Ridge regression (L2 penalty - shrinks coefficients, handles multicollinearity)
82//! let ridge_result = ridge_fit(&x, &y, &RidgeFitOptions {
83//! lambda: 1.0,
84//! intercept: true,
85//! standardize: true,
86//! ..Default::default()
87//! })?;
88//!
89//! // Lasso regression (L1 penalty — automatic variable selection by zeroing coefficients)
90//! let lasso_result = lasso_fit(&x, &y, &LassoFitOptions {
91//! lambda: 0.1,
92//! intercept: true,
93//! standardize: true,
94//! ..Default::default()
95//! })?;
96//! # Ok::<(), linreg_core::Error>(())
97//! ```
98//!
99//! ## WLS and LOESS
100//!
101//! ```no_run
102//! use linreg_core::weighted_regression::wls_regression;
103//! use linreg_core::loess::{loess_fit, LoessOptions};
104//!
105//! // Weighted Least Squares — down-weight high-variance observations
106//! let weights = vec![1.0, 2.0, 1.0, 2.0, 1.0];
107//! let wls = wls_regression(
108//! &[2.5, 3.7, 4.2, 5.1, 6.3],
109//! &[vec![1.0, 2.0, 3.0, 4.0, 5.0]],
110//! &weights,
111//! )?;
112//!
113//! // LOESS — non-parametric smoothing (single predictor)
114//! let x = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
115//! let y = vec![1.0, 2.1, 3.9, 8.2, 16.5, 32.1];
116//! let loess = loess_fit(&y, &[x], &LoessOptions::default())?;
117//! # Ok::<(), linreg_core::Error>(())
118//! ```
119//!
120//! ## K-Fold Cross Validation
121//!
122//! ```no_run
123//! use linreg_core::cross_validation::{kfold_cv_ols, KFoldOptions};
124//!
125//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3, 7.0, 7.5, 8.1];
126//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
127//! let names = vec!["Intercept".to_string(), "X1".to_string()];
128//!
129//! let cv = kfold_cv_ols(&y, &[x1], &names, &KFoldOptions {
130//! n_folds: 5,
131//! shuffle: true,
132//! seed: Some(42),
133//! })?;
134//! println!("CV RMSE: {:.4} ± {:.4}", cv.mean_rmse, cv.std_rmse);
135//! println!("CV R²: {:.4} ± {:.4}", cv.mean_r_squared, cv.std_r_squared);
136//! # Ok::<(), linreg_core::Error>(())
137//! ```
138//!
139//! ## Diagnostic Tests
140//!
141//! After fitting a model, validate its assumptions:
142//!
143//! | Test | Tests For | Use When |
144//! |------|-----------|----------|
145//! | [`diagnostics::rainbow_test`] | Linearity | Checking if relationships are linear |
146//! | [`diagnostics::harvey_collier_test`] | Functional form | Suspecting model misspecification |
147//! | [`diagnostics::reset_test`] | Specification error | Detecting omitted variables or wrong functional form |
148//! | [`diagnostics::breusch_pagan_test`] | Heteroscedasticity | Variance changes with predictors |
149//! | [`diagnostics::white_test`] | Heteroscedasticity | More general than Breusch-Pagan |
150//! | [`diagnostics::shapiro_wilk_test`] | Normality | Small to moderate samples (n ≤ 5000) |
151//! | [`diagnostics::jarque_bera_test`] | Normality | Large samples, skewness/kurtosis |
152//! | [`diagnostics::anderson_darling_test`] | Normality | Tail-sensitive, any sample size |
153//! | [`diagnostics::durbin_watson_test`] | Autocorrelation | Time series or ordered data |
154//! | [`diagnostics::breusch_godfrey_test`] | Higher-order autocorrelation | Detecting serial correlation at multiple lags |
155//! | [`diagnostics::cooks_distance_test`] | Influential points | Identifying high-impact observations |
156//! | [`diagnostics::dfbetas_test`] | Coefficient influence | Which observations drive each coefficient |
157//! | [`diagnostics::dffits_test`] | Fitted value influence | Influence of each observation on its own prediction |
158//! | [`diagnostics::vif_test`] | Multicollinearity | Detecting highly correlated predictors |
159//!
160//! ```rust
161//! use linreg_core::diagnostics::{rainbow_test, breusch_pagan_test, RainbowMethod};
162//!
163//! # let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
164//! # let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
165//! # let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
166//! // Rainbow test for linearity
167//! let rainbow = rainbow_test(&y, &[x1.clone(), x2.clone()], 0.5, RainbowMethod::R)?;
168//! if rainbow.r_result.as_ref().map_or(false, |r| r.p_value < 0.05) {
169//! println!("Warning: relationship may be non-linear");
170//! }
171//!
172//! // Breusch-Pagan test for heteroscedasticity
173//! let bp = breusch_pagan_test(&y, &[x1, x2])?;
174//! if bp.p_value < 0.05 {
175//! println!("Warning: residuals have non-constant variance");
176//! }
177//! # Ok::<(), linreg_core::Error>(())
178//! ```
179//!
180//! ## Feature Flags
181//!
182//! | Flag | Default | Description |
183//! |------|---------|-------------|
184//! | `wasm` | Yes | Enables WASM bindings and browser support |
185//! | `python` | No | Enables Python bindings via PyO3 (built with maturin) |
186//! | `validation` | No | Includes test data for validation tests |
187//!
188//! For native-only builds (smaller binary, no WASM deps):
189//!
190//! ```toml
191//! linreg-core = { version = "0.6", default-features = false }
192//! ```
193//!
194//! ## Why This Library?
195//!
196//! - **Zero dependencies** — No `nalgebra`, no `statrs`, no `ndarray`. Pure Rust.
197//! - **Validated** — Outputs match R's `lm()`, `glmnet`, and Python's `statsmodels`
198//! - **WASM-ready** — Same code runs natively and in browsers
199//! - **Python-ready** — PyO3 bindings expose the full API to Python
200//! - **Permissive license** — MIT OR Apache-2.0
201//!
202//! ## Module Structure
203//!
204//! - [`core`] — OLS regression, coefficients, residuals, VIF, AIC/BIC
205//! - [`regularized`] — Ridge, Lasso, Elastic Net, regularization paths
206//! - [`weighted_regression`] — Weighted Least Squares (WLS)
207//! - [`loess`] — Locally weighted scatterplot smoothing
208//! - [`cross_validation`] — K-Fold Cross Validation for all regression types
209//! - [`mod@prediction_intervals`] — Prediction and confidence intervals for all model types
210//! - [`diagnostics`] — 14 statistical tests (linearity, heteroscedasticity, normality, autocorrelation, influence)
211//! - [`serialization`] — Model save/load to JSON (native Rust)
212//! - [`stats`] — Descriptive statistics utilities
213//! - [`distributions`] — Statistical distributions (t, F, χ², normal, beta, gamma)
214//! - [`linalg`] — Matrix operations, QR decomposition, linear system solver
215//! - [`error`] — Error types and Result alias
216//!
217//! ## Links
218//!
219//! - [Repository](https://github.com/jesse-anderson/linreg-core)
220//! - [Documentation](https://docs.rs/linreg-core)
221//! - [Examples](https://github.com/jesse-anderson/linreg-core/tree/main/examples)
222//!
223//! ## Disclaimer
224//!
225//! This library is under active development and has not reached 1.0 stability.
226//! While outputs are validated against R and Python implementations, **do not
227//! use this library for critical applications** (medical, financial, safety-critical
228//! systems) without independent verification. See the LICENSE for full terms.
229//! The software is provided "as is" without warranty of any kind.
230
231// Import core modules (always available)
232pub mod core;
233pub mod cross_validation;
234pub mod diagnostics;
235pub mod distributions;
236pub mod error;
237pub mod linalg;
238pub mod loess;
239pub mod prediction_intervals;
240pub mod regularized;
241pub mod serialization;
242pub mod stats;
243pub mod weighted_regression;
244
245// Python bindings (only compiled when "python" feature is enabled)
246// Module structure: src/python/ with mod.rs, error.rs, types.rs, results.rs
247#[cfg(feature = "python")]
248pub mod python;
249
250// WASM bindings (only compiled when "wasm" feature is enabled)
251// Module structure: src/wasm.rs - contains all wasm-bindgen exports
252#[cfg(feature = "wasm")]
253pub mod wasm;
254
255// Unit tests are now in tests/unit/ directory
256// - error_tests.rs -> tests/unit/error_tests.rs
257// - core_tests.rs -> tests/unit/core_tests.rs
258// - linalg_tests.rs -> tests/unit/linalg_tests.rs
259// - validation_tests.rs -> tests/validation/main.rs
260// - diagnostics_tests.rs: disabled (references unimplemented functions)
261
262// Re-export public API (always available)
263pub use core::{aic, aic_python, bic, bic_python, log_likelihood, RegressionOutput, VifResult};
264pub use prediction_intervals::{
265 compute_from_fit, elastic_net_prediction_intervals, lasso_prediction_intervals,
266 prediction_intervals, ridge_prediction_intervals, PredictionIntervalOutput,
267};
268pub use diagnostics::{
269 BGTestType, BreuschGodfreyResult, CooksDistanceResult, DiagnosticTestResult,
270 RainbowMethod, RainbowSingleResult, RainbowTestOutput, ResetType,
271 WhiteMethod, WhiteSingleResult, WhiteTestOutput,
272};
273pub use cross_validation::{
274 CVResult, FoldResult, KFoldOptions, kfold_cv_elastic_net, kfold_cv_lasso, kfold_cv_ols,
275 kfold_cv_ridge,
276};
277pub use loess::{loess_fit, LoessFit, LoessOptions};
278pub use weighted_regression::{wls_regression, WlsFit};
279
280// Re-export core test functions with different names to avoid WASM conflicts
281pub use diagnostics::rainbow_test as rainbow_test_core;
282pub use diagnostics::white_test as white_test_core;
283
284pub use error::{error_json, error_to_json, Error, Result};