Skip to main content

linreg_core/
lib.rs

1//! # linreg-core
2//!
3//! A lightweight, self-contained linear regression library in pure Rust.
4//!
5//! **No external math dependencies.** All linear algebra (matrices, QR decomposition)
6//! and statistical functions (distributions, hypothesis tests) are implemented from
7//! scratch. Compiles to WebAssembly for browser use, exposes Python bindings via PyO3,
8//! or runs as a native Rust crate.
9//!
10//! **[Live Demo →](https://jesse-anderson.net/linreg-core/)**
11//!
12//! ## What This Does
13//!
14//! - **OLS Regression** — Ordinary Least Squares with numerically stable QR decomposition
15//! - **Regularized Regression** — Ridge, Lasso, and Elastic Net via coordinate descent
16//! - **WLS Regression** — Weighted Least Squares for heteroscedastic data
17//! - **LOESS** — Non-parametric locally weighted smoothing
18//! - **K-Fold Cross Validation** — Model evaluation for all regression types
19//! - **Prediction Intervals** — Point and interval predictions for all model types
20//! - **Diagnostic Tests** — 14 statistical tests for validating regression assumptions
21//! - **Feature Importance** — Standardized coefficients, SHAP, permutation importance, VIF ranking
22//! - **Model Serialization** — Save/load trained models to JSON
23//! - **WASM Support** — Same API works in browsers via WebAssembly
24//! - **Python Bindings** — PyO3 bindings available via `pip install linreg-core`
25//!
26//! ## Quick Start
27//!
28//! ### Native Rust
29//!
30//! Add to `Cargo.toml` (no WASM overhead):
31//!
32//! ```toml
33//! [dependencies]
34//! linreg-core = { version = "0.8", default-features = false }
35//! ```
36//!
37//! ```rust
38//! use linreg_core::core::ols_regression;
39//!
40//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
41//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
42//! let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
43//! let names = vec!["Intercept".into(), "Temp".into(), "Pressure".into()];
44//!
45//! let result = ols_regression(&y, &[x1, x2], &names)?;
46//! println!("R²: {}", result.r_squared);
47//! println!("F-statistic: {}", result.f_statistic);
48//! println!("AIC: {}", result.aic);
49//! # Ok::<(), linreg_core::Error>(())
50//! ```
51//!
52//! ### WebAssembly (JavaScript)
53//!
54//! ```toml
55//! [dependencies]
56//! linreg-core = "0.8"
57//! ```
58//!
59//! Build with `wasm-pack build --target web`, then use in JavaScript:
60//!
61//! ```text
62//! import init, { ols_regression } from './linreg_core.js';
63//! await init();
64//!
65//! const result = JSON.parse(ols_regression(
66//!     JSON.stringify([2.5, 3.7, 4.2, 5.1, 6.3]),
67//!     JSON.stringify([[1,2,3,4,5], [2,4,5,4,3]]),
68//!     JSON.stringify(["Intercept", "X1", "X2"])
69//! ));
70//! console.log("R²:", result.r_squared);
71//! ```
72//!
73//! ## Regularized Regression
74//!
75//! ```no_run
76//! use linreg_core::regularized::{ridge_fit, RidgeFitOptions, lasso_fit, LassoFitOptions};
77//! use linreg_core::linalg::Matrix;
78//!
79//! let x = Matrix::new(100, 3, vec![0.0; 300]);
80//! let y = vec![0.0; 100];
81//!
82//! // Ridge regression (L2 penalty - shrinks coefficients, handles multicollinearity)
83//! let ridge_result = ridge_fit(&x, &y, &RidgeFitOptions {
84//!     lambda: 1.0,
85//!     intercept: true,
86//!     standardize: true,
87//!     ..Default::default()
88//! })?;
89//!
90//! // Lasso regression (L1 penalty — automatic variable selection by zeroing coefficients)
91//! let lasso_result = lasso_fit(&x, &y, &LassoFitOptions {
92//!     lambda: 0.1,
93//!     intercept: true,
94//!     standardize: true,
95//!     ..Default::default()
96//! })?;
97//! # Ok::<(), linreg_core::Error>(())
98//! ```
99//!
100//! ## WLS and LOESS
101//!
102//! ```no_run
103//! use linreg_core::weighted_regression::wls_regression;
104//! use linreg_core::loess::{loess_fit, LoessOptions};
105//!
106//! // Weighted Least Squares — down-weight high-variance observations
107//! let weights = vec![1.0, 2.0, 1.0, 2.0, 1.0];
108//! let wls = wls_regression(
109//!     &[2.5, 3.7, 4.2, 5.1, 6.3],
110//!     &[vec![1.0, 2.0, 3.0, 4.0, 5.0]],
111//!     &weights,
112//! )?;
113//!
114//! // LOESS — non-parametric smoothing (single predictor)
115//! let x = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
116//! let y = vec![1.0, 2.1, 3.9, 8.2, 16.5, 32.1];
117//! let loess = loess_fit(&y, &[x], &LoessOptions::default())?;
118//! # Ok::<(), linreg_core::Error>(())
119//! ```
120//!
121//! ## K-Fold Cross Validation
122//!
123//! ```no_run
124//! use linreg_core::cross_validation::{kfold_cv_ols, KFoldOptions};
125//!
126//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3, 7.0, 7.5, 8.1];
127//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
128//! let names = vec!["Intercept".to_string(), "X1".to_string()];
129//!
130//! let cv = kfold_cv_ols(&y, &[x1], &names, &KFoldOptions {
131//!     n_folds: 5,
132//!     shuffle: true,
133//!     seed: Some(42),
134//! })?;
135//! println!("CV RMSE: {:.4} ± {:.4}", cv.mean_rmse, cv.std_rmse);
136//! println!("CV R²:   {:.4} ± {:.4}", cv.mean_r_squared, cv.std_r_squared);
137//! # Ok::<(), linreg_core::Error>(())
138//! ```
139//!
140//! ## Diagnostic Tests
141//!
142//! After fitting a model, validate its assumptions:
143//!
144//! | Test | Tests For | Use When |
145//! |------|-----------|----------|
146//! | [`diagnostics::rainbow_test`] | Linearity | Checking if relationships are linear |
147//! | [`diagnostics::harvey_collier_test`] | Functional form | Suspecting model misspecification |
148//! | [`diagnostics::reset_test`] | Specification error | Detecting omitted variables or wrong functional form |
149//! | [`diagnostics::breusch_pagan_test`] | Heteroscedasticity | Variance changes with predictors |
150//! | [`diagnostics::white_test`] | Heteroscedasticity | More general than Breusch-Pagan |
151//! | [`diagnostics::shapiro_wilk_test`] | Normality | Small to moderate samples (n ≤ 5000) |
152//! | [`diagnostics::jarque_bera_test`] | Normality | Large samples, skewness/kurtosis |
153//! | [`diagnostics::anderson_darling_test`] | Normality | Tail-sensitive, any sample size |
154//! | [`diagnostics::durbin_watson_test`] | Autocorrelation | Time series or ordered data |
155//! | [`diagnostics::breusch_godfrey_test`] | Higher-order autocorrelation | Detecting serial correlation at multiple lags |
156//! | [`diagnostics::cooks_distance_test`] | Influential points | Identifying high-impact observations |
157//! | [`diagnostics::dfbetas_test`] | Coefficient influence | Which observations drive each coefficient |
158//! | [`diagnostics::dffits_test`] | Fitted value influence | Influence of each observation on its own prediction |
159//! | [`diagnostics::vif_test`] | Multicollinearity | Detecting highly correlated predictors |
160//!
161//! ```rust
162//! use linreg_core::diagnostics::{rainbow_test, breusch_pagan_test, RainbowMethod};
163//!
164//! # let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
165//! # let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
166//! # let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
167//! // Rainbow test for linearity
168//! let rainbow = rainbow_test(&y, &[x1.clone(), x2.clone()], 0.5, RainbowMethod::R)?;
169//! if rainbow.r_result.as_ref().map_or(false, |r| r.p_value < 0.05) {
170//!     println!("Warning: relationship may be non-linear");
171//! }
172//!
173//! // Breusch-Pagan test for heteroscedasticity
174//! let bp = breusch_pagan_test(&y, &[x1, x2])?;
175//! if bp.p_value < 0.05 {
176//!     println!("Warning: residuals have non-constant variance");
177//! }
178//! # Ok::<(), linreg_core::Error>(())
179//! ```
180//!
181//! ## Feature Flags
182//!
183//! | Flag | Default | Description |
184//! |------|---------|-------------|
185//! | `wasm` | Yes | Enables WASM bindings and browser support |
186//! | `python` | No | Enables Python bindings via PyO3 (built with maturin) |
187//! | `validation` | No | Includes test data for validation tests |
188//!
189//! For native-only builds (smaller binary, no WASM deps):
190//!
191//! ```toml
192//! linreg-core = { version = "0.8", default-features = false }
193//! ```
194//!
195//! ## Why This Library?
196//!
197//! - **Zero dependencies** — No `nalgebra`, no `statrs`, no `ndarray`. Pure Rust.
198//! - **Validated** — Outputs match R's `lm()`, `glmnet`, and Python's `statsmodels`
199//! - **WASM-ready** — Same code runs natively and in browsers
200//! - **Python-ready** — PyO3 bindings expose the full API to Python
201//! - **Permissive license** — MIT OR Apache-2.0
202//!
203//! ## Module Structure
204//!
205//! - [`core`] — OLS regression, coefficients, residuals, VIF, AIC/BIC
206//! - [`regularized`] — Ridge, Lasso, Elastic Net, regularization paths
207//! - [`polynomial`] — Polynomial regression of any degree with centering/standardization
208//! - [`weighted_regression`] — Weighted Least Squares (WLS)
209//! - [`loess`] — Locally weighted scatterplot smoothing
210//! - [`cross_validation`] — K-Fold Cross Validation for all regression types
211//! - [`mod@prediction_intervals`] — Prediction and confidence intervals for all model types
212//! - [`feature_importance`] — Standardized coefficients, SHAP, permutation importance, VIF ranking
213//! - [`diagnostics`] — 14 statistical tests (linearity, heteroscedasticity, normality, autocorrelation, influence)
214//! - [`serialization`] — Model save/load to JSON (native Rust)
215//! - [`stats`] — Descriptive statistics utilities
216//! - [`distributions`] — Statistical distributions (t, F, χ², normal, beta, gamma)
217//! - [`linalg`] — Matrix operations, QR decomposition, linear system solver
218//! - [`error`] — Error types and Result alias
219//!
220//! ## Links
221//!
222//! - [Repository](https://github.com/jesse-anderson/linreg-core)
223//! - [Documentation](https://docs.rs/linreg-core)
224//! - [Examples](https://github.com/jesse-anderson/linreg-core/tree/main/examples)
225//!
226//! ## Disclaimer
227//!
228//! This library is under active development and has not reached 1.0 stability.
229//! While outputs are validated against R and Python implementations, **do not
230//! use this library for critical applications** (medical, financial, safety-critical
231//! systems) without independent verification. See the LICENSE for full terms.
232//! The software is provided "as is" without warranty of any kind.
233
234// Import core modules (always available)
235pub mod core;
236pub mod cross_validation;
237pub mod diagnostics;
238pub mod distributions;
239pub mod error;
240pub mod feature_importance;
241pub mod linalg;
242pub mod loess;
243pub mod polynomial;
244pub mod prediction_intervals;
245pub mod regularized;
246pub mod serialization;
247pub mod stats;
248pub mod weighted_regression;
249
250// Python bindings (only compiled when "python" feature is enabled)
251// Module structure: src/python/ with mod.rs, error.rs, types.rs, results.rs
252#[cfg(feature = "python")]
253pub mod python;
254
255// WASM bindings (only compiled when "wasm" feature is enabled)
256// Module structure: src/wasm.rs - contains all wasm-bindgen exports
257#[cfg(feature = "wasm")]
258pub mod wasm;
259
260// Windows DLL / FFI bindings (only compiled when "ffi" feature is enabled)
261// Provides a handle-based stdcall API for VBA/Excel use.
262#[cfg(feature = "ffi")]
263pub mod ffi;
264
265// XLL add-in bindings (only compiled when "xll" feature is enabled)
266// Exposes linreg-core functions as Excel worksheet UDFs via xladd.
267#[cfg(feature = "xll")]
268pub mod xll;
269
270// Unit tests are now in tests/unit/ directory
271// - error_tests.rs -> tests/unit/error_tests.rs
272// - core_tests.rs -> tests/unit/core_tests.rs
273// - linalg_tests.rs -> tests/unit/linalg_tests.rs
274// - validation_tests.rs -> tests/validation/main.rs
275// - diagnostics_tests.rs: disabled (references unimplemented functions)
276
277// Re-export public API (always available)
278pub use core::{aic, aic_python, bic, bic_python, log_likelihood, RegressionOutput, VifResult};
279pub use prediction_intervals::{
280    compute_from_fit, elastic_net_prediction_intervals, lasso_prediction_intervals,
281    prediction_intervals, ridge_prediction_intervals, PredictionIntervalOutput,
282};
283pub use diagnostics::{
284    BGTestType, BreuschGodfreyResult, CooksDistanceResult, DiagnosticTestResult,
285    RainbowMethod, RainbowSingleResult, RainbowTestOutput, ResetType,
286    WhiteMethod, WhiteSingleResult, WhiteTestOutput,
287};
288pub use cross_validation::{
289    CVResult, FoldResult, KFoldOptions, kfold_cv_elastic_net, kfold_cv_lasso, kfold_cv_ols,
290    kfold_cv_ridge,
291};
292pub use loess::{loess_fit, LoessFit, LoessOptions};
293pub use polynomial::{polynomial_regression, predict as polynomial_predict, PolynomialFit, PolynomialOptions};
294pub use weighted_regression::{wls_regression, WlsFit};
295pub use feature_importance::{
296    PermutationImportanceOptions, PermutationImportanceOutput, ShapOutput,
297    StandardizedCoefficientsOutput, VifRankingOutput, permutation_importance_elastic_net,
298    permutation_importance_lasso, permutation_importance_loess, permutation_importance_ols,
299    permutation_importance_ols_named, permutation_importance_ridge, shap_values_elastic_net,
300    shap_values_lasso, shap_values_linear, shap_values_linear_named,
301    shap_values_polynomial, shap_values_ridge, standardized_coefficients,
302    standardized_coefficients_named, vif_ranking,
303};
304
305// Re-export core test functions with different names to avoid WASM conflicts
306pub use diagnostics::rainbow_test as rainbow_test_core;
307pub use diagnostics::white_test as white_test_core;
308
309pub use error::{error_json, error_to_json, Error, Result};
310pub use stats::{correlation, max, mean, median, min, mode, quantile, range, stddev, sum, variance, FiveNumberSummary, ModeResult};