linreg_core/lib.rs
1//! # linreg-core
2//!
3//! A lightweight, self-contained linear regression library in pure Rust.
4//!
5//! **No external math dependencies.** All linear algebra (matrices, QR decomposition)
6//! and statistical functions (distributions, hypothesis tests) are implemented from
7//! scratch. Compiles to WebAssembly for browser use or runs as a native Rust crate.
8//!
9//! ## What This Does
10//!
11//! - **OLS Regression** — Ordinary Least Squares with numerically stable QR decomposition
12//! - **Regularized Regression** — Ridge, Lasso, and Elastic Net via coordinate descent
13//! - **Diagnostic Tests** — 8+ statistical tests for validating regression assumptions
14//! - **WASM Support** — Same API works in browsers via WebAssembly
15//!
16//! ## Quick Start
17//!
18//! ### Native Rust
19//!
20//! Add to `Cargo.toml` (no WASM overhead):
21//!
22//! ```toml
23//! [dependencies]
24//! linreg-core = { version = "0.3", default-features = false }
25//! ```
26//!
27//! ```rust
28//! use linreg_core::core::ols_regression;
29//!
30//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
31//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
32//! let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
33//! let names = vec!["Intercept".into(), "Temp".into(), "Pressure".into()];
34//!
35//! let result = ols_regression(&y, &[x1, x2], &names)?;
36//! println!("R²: {}", result.r_squared);
37//! println!("F-statistic: {}", result.f_statistic);
38//! # Ok::<(), linreg_core::Error>(())
39//! ```
40//!
41//! ### WebAssembly (JavaScript)
42//!
43//! ```toml
44//! [dependencies]
45//! linreg-core = "0.3"
46//! ```
47//!
48//! Build with `wasm-pack build --target web`, then use in JavaScript:
49//!
50//! ```text
51//! import init, { ols_regression } from './linreg_core.js';
52//! await init();
53//!
54//! const result = JSON.parse(ols_regression(
55//! JSON.stringify([2.5, 3.7, 4.2, 5.1, 6.3]),
56//! JSON.stringify([[1,2,3,4,5], [2,4,5,4,3]]),
57//! JSON.stringify(["Intercept", "X1", "X2"])
58//! ));
59//! console.log("R²:", result.r_squared);
60//! ```
61//!
62//! ## Regularized Regression
63//!
64//! ```no_run
65//! use linreg_core::regularized::{ridge, lasso};
66//! use linreg_core::linalg::Matrix;
67//!
68//! let x = Matrix::new(100, 3, vec![0.0; 300]);
69//! let y = vec![0.0; 100];
70//!
71//! // Ridge regression (L2 penalty - shrinks coefficients, handles multicollinearity)
72//! let ridge_result = ridge::ridge_fit(&x, &y, &ridge::RidgeFitOptions {
73//! lambda: 1.0,
74//! intercept: true,
75//! standardize: true,
76//! ..Default::default()
77//! })?;
78//!
79//! // Lasso regression (L1 penalty - does variable selection by zeroing coefficients)
80//! let lasso_result = lasso::lasso_fit(&x, &y, &lasso::LassoFitOptions {
81//! lambda: 0.1,
82//! intercept: true,
83//! standardize: true,
84//! ..Default::default()
85//! })?;
86//! # Ok::<(), linreg_core::Error>(())
87//! ```
88//!
89//! ## Diagnostic Tests
90//!
91//! After fitting a model, validate its assumptions:
92//!
93//! | Test | Tests For | Use When |
94//! |------|-----------|----------|
95//! | [`rainbow_test`] | Linearity | Checking if relationships are linear |
96//! | [`harvey_collier_test`] | Functional form | Suspecting model misspecification |
97//! | [`breusch_pagan_test`] | Heteroscedasticity | Variance changes with predictors |
98//! | [`white_test`] | Heteroscedasticity | More general than Breusch-Pagan |
99//! | [`shapiro_wilk_test`] | Normality | Small to moderate samples (n ≤ 5000) |
100//! | [`jarque_bera_test`] | Normality | Large samples, skewness/kurtosis |
101//! | [`anderson_darling_test`] | Normality | Tail-sensitive, any sample size |
102//! | [`durbin_watson_test`] | Autocorrelation | Time series or ordered data |
103//! | [`cooks_distance_test`] | Influential points | Identifying high-impact observations |
104//!
105//! ```rust
106//! use linreg_core::diagnostics::{rainbow_test, breusch_pagan_test, RainbowMethod};
107//!
108//! # let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
109//! # let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
110//! # let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
111//! // Rainbow test for linearity
112//! let rainbow = rainbow_test(&y, &[x1.clone(), x2.clone()], 0.5, RainbowMethod::R)?;
113//! if rainbow.r_result.as_ref().map_or(false, |r| r.p_value < 0.05) {
114//! println!("Warning: relationship may be non-linear");
115//! }
116//!
117//! // Breusch-Pagan test for heteroscedasticity
118//! let bp = breusch_pagan_test(&y, &[x1, x2])?;
119//! if bp.p_value < 0.05 {
120//! println!("Warning: residuals have non-constant variance");
121//! }
122//! # Ok::<(), linreg_core::Error>(())
123//! ```
124//!
125//! ## Feature Flags
126//!
127//! | Flag | Default | Description |
128//! |------|---------|-------------|
129//! | `wasm` | Yes | Enables WASM bindings and browser support |
130//! | `validation` | No | Includes test data for validation tests |
131//!
132//! For native-only builds (smaller binary, no WASM deps):
133//!
134//! ```toml
135//! linreg-core = { version = "0.3", default-features = false }
136//! ```
137//!
138//! ## Why This Library?
139//!
140//! - **Zero dependencies** — No `nalgebra`, no `statrs`, no `ndarray`. Pure Rust.
141//! - **Validated** — Outputs match R's `lm()` and Python's `statsmodels`
142//! - **WASM-ready** — Same code runs natively and in browsers
143//! - **Small** — Core is ~2000 lines, compiles quickly
144//! - **Permissive license** — MIT OR Apache-2.0
145//!
146//! ## Module Structure
147//!
148//! - [`core`] — OLS regression, coefficients, residuals, VIF
149//! - [`regularized`] — Ridge, Lasso, Elastic Net, regularization paths
150//! - [`diagnostics`] — All diagnostic tests (linearity, heteroscedasticity, normality, autocorrelation)
151//! - [`distributions`] — Statistical distributions (t, F, χ², normal, beta, gamma)
152//! - [`linalg`] — Matrix operations, QR decomposition, linear system solver
153//! - [`error`] — Error types and Result alias
154//!
155//! ## Links
156//!
157//! - [Repository](https://github.com/jesse-anderson/linreg-core)
158//! - [Documentation](https://docs.rs/linreg-core)
159//! - [Examples](https://github.com/jesse-anderson/linreg-core/tree/main/examples)
160//!
161//! ## Disclaimer
162//!
163//! This library is under active development and has not reached 1.0 stability.
164//! While outputs are validated against R and Python implementations, **do not
165//! use this library for critical applications** (medical, financial, safety-critical
166//! systems) without independent verification. See the LICENSE for full terms.
167//! The software is provided "as is" without warranty of any kind.
168
169// Import core modules (always available)
170pub mod core;
171pub mod diagnostics;
172pub mod distributions;
173pub mod error;
174pub mod linalg;
175pub mod regularized;
176pub mod stats;
177
178// Python bindings (only compiled when "python" feature is enabled)
179// Module structure: src/python/ with mod.rs, error.rs, types.rs, results.rs
180#[cfg(feature = "python")]
181pub mod python;
182
183// Unit tests are now in tests/unit/ directory
184// - error_tests.rs -> tests/unit/error_tests.rs
185// - core_tests.rs -> tests/unit/core_tests.rs
186// - linalg_tests.rs -> tests/unit/linalg_tests.rs
187// - validation_tests.rs -> tests/validation/main.rs
188// - diagnostics_tests.rs: disabled (references unimplemented functions)
189
190// Re-export public API (always available)
191pub use core::{aic, aic_python, bic, bic_python, log_likelihood, RegressionOutput, VifResult};
192pub use diagnostics::{
193 BGTestType, BreuschGodfreyResult, CooksDistanceResult, DiagnosticTestResult,
194 RainbowMethod, RainbowSingleResult, RainbowTestOutput, ResetType,
195 WhiteMethod, WhiteSingleResult, WhiteTestOutput,
196};
197
198// Re-export core test functions with different names to avoid WASM conflicts
199pub use diagnostics::rainbow_test as rainbow_test_core;
200pub use diagnostics::white_test as white_test_core;
201
202pub use error::{error_json, error_to_json, Error, Result};
203
204// ============================================================================
205// WASM-specific code (only compiled when "wasm" feature is enabled)
206// ============================================================================
207
208#[cfg(feature = "wasm")]
209use wasm_bindgen::prelude::*;
210
211#[cfg(feature = "wasm")]
212use std::collections::HashSet;
213
214#[cfg(feature = "wasm")]
215use serde::Serialize;
216
217#[cfg(feature = "wasm")]
218use crate::distributions::{normal_inverse_cdf, student_t_cdf};
219
220// ============================================================================
221// CSV Parsing (WASM-only)
222// ============================================================================
223
224#[cfg(feature = "wasm")]
225#[derive(Serialize)]
226struct ParsedCsv {
227 headers: Vec<String>,
228 data: Vec<serde_json::Map<String, serde_json::Value>>,
229 numeric_columns: Vec<String>,
230}
231
232#[cfg(feature = "wasm")]
233#[wasm_bindgen]
234/// Parses CSV data and returns it as a JSON string.
235///
236/// Parses the CSV content and identifies numeric columns. Returns a JSON object
237/// with headers, data rows, and a list of numeric column names.
238///
239/// # Arguments
240///
241/// * `content` - CSV content as a string
242///
243/// # Returns
244///
245/// JSON string with structure:
246/// ```json
247/// {
248/// "headers": ["col1", "col2", ...],
249/// "data": [{"col1": 1.0, "col2": "text"}, ...],
250/// "numeric_columns": ["col1", ...]
251/// }
252/// ```
253///
254/// # Errors
255///
256/// Returns a JSON error object if parsing fails or domain check fails.
257pub fn parse_csv(content: &str) -> String {
258 if let Err(e) = check_domain() {
259 return error_to_json(&e);
260 }
261
262 let mut reader = csv::ReaderBuilder::new()
263 .has_headers(true)
264 .flexible(true)
265 .from_reader(content.as_bytes());
266
267 // Get headers
268 let headers: Vec<String> = match reader.headers() {
269 Ok(h) => h.iter().map(|s| s.to_string()).collect(),
270 Err(e) => return error_json(&format!("Failed to read headers: {}", e)),
271 };
272
273 let mut data = Vec::new();
274 let mut numeric_col_set = HashSet::new();
275
276 for result in reader.records() {
277 let record = match result {
278 Ok(r) => r,
279 Err(e) => return error_json(&format!("Failed to parse CSV record: {}", e)),
280 };
281
282 if record.len() != headers.len() {
283 continue;
284 }
285
286 let mut row_map = serde_json::Map::new();
287
288 for (i, field) in record.iter().enumerate() {
289 if i >= headers.len() {
290 continue;
291 }
292
293 let header = &headers[i];
294 let val_trimmed = field.trim();
295
296 // Try to parse as f64
297 if let Ok(num) = val_trimmed.parse::<f64>() {
298 if num.is_finite() {
299 row_map.insert(
300 header.clone(),
301 serde_json::Value::Number(serde_json::Number::from_f64(num).unwrap()),
302 );
303 numeric_col_set.insert(header.clone());
304 continue;
305 }
306 }
307
308 // Fallback to string
309 row_map.insert(
310 header.clone(),
311 serde_json::Value::String(val_trimmed.to_string()),
312 );
313 }
314 data.push(row_map);
315 }
316
317 let mut numeric_columns: Vec<String> = numeric_col_set.into_iter().collect();
318 numeric_columns.sort();
319
320 let output = ParsedCsv {
321 headers,
322 data,
323 numeric_columns,
324 };
325
326 serde_json::to_string(&output).unwrap_or_else(|_| error_json("Failed to serialize CSV output"))
327}
328
329// ============================================================================
330// OLS Regression WASM Wrapper
331// ============================================================================
332
333#[cfg(feature = "wasm")]
334#[wasm_bindgen]
335/// Performs OLS regression via WASM.
336///
337/// All parameters and return values are JSON-encoded strings for JavaScript
338/// interoperability. Returns regression output including coefficients,
339/// standard errors, diagnostic statistics, and VIF analysis.
340///
341/// # Arguments
342///
343/// * `y_json` - JSON array of response variable values: `[1.0, 2.0, 3.0]`
344/// * `x_vars_json` - JSON array of predictor arrays: `[[1.0, 2.0], [0.5, 1.0]]`
345/// * `variable_names` - JSON array of variable names: `["Intercept", "X1", "X2"]`
346///
347/// # Returns
348///
349/// JSON string containing the complete regression output with coefficients,
350/// standard errors, t-statistics, p-values, R², F-statistic, residuals, leverage, VIF, etc.
351///
352/// # Errors
353///
354/// Returns a JSON error object if:
355/// - JSON parsing fails
356/// - Insufficient data (n ≤ k + 1)
357/// - Matrix is singular
358/// - Domain check fails
359pub fn ols_regression(y_json: &str, x_vars_json: &str, variable_names: &str) -> String {
360 if let Err(e) = check_domain() {
361 return error_to_json(&e);
362 }
363
364 // Parse JSON input
365 let y: Vec<f64> = match serde_json::from_str(y_json) {
366 Ok(v) => v,
367 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
368 };
369
370 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
371 Ok(v) => v,
372 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
373 };
374
375 let names: Vec<String> = match serde_json::from_str(variable_names) {
376 Ok(v) => v,
377 Err(_) => vec!["Intercept".to_string()],
378 };
379
380 // Call core function
381 match core::ols_regression(&y, &x_vars, &names) {
382 Ok(output) => serde_json::to_string(&output)
383 .unwrap_or_else(|_| error_json("Failed to serialize output")),
384 Err(e) => error_json(&e.to_string()),
385 }
386}
387
388// ============================================================================
389// Diagnostic Tests WASM Wrappers
390// ============================================================================
391
392/// Performs the Rainbow test for linearity via WASM.
393///
394/// The Rainbow test checks whether the relationship between predictors and response
395/// is linear. A significant p-value suggests non-linearity.
396///
397/// # Arguments
398///
399/// * `y_json` - JSON array of response variable values
400/// * `x_vars_json` - JSON array of predictor arrays
401/// * `fraction` - Fraction of data to use in the central subset (0.0 to 1.0, typically 0.5)
402/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
403///
404/// # Returns
405///
406/// JSON string containing test statistic, p-value, and interpretation.
407///
408/// # Errors
409///
410/// Returns a JSON error object if parsing fails or domain check fails.
411#[cfg(feature = "wasm")]
412#[wasm_bindgen]
413pub fn rainbow_test(y_json: &str, x_vars_json: &str, fraction: f64, method: &str) -> String {
414 if let Err(e) = check_domain() {
415 return error_to_json(&e);
416 }
417
418 let y: Vec<f64> = match serde_json::from_str(y_json) {
419 Ok(v) => v,
420 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
421 };
422
423 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
424 Ok(v) => v,
425 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
426 };
427
428 // Parse method parameter (default to "r" for R)
429 let method = match method.to_lowercase().as_str() {
430 "python" => diagnostics::RainbowMethod::Python,
431 "both" => diagnostics::RainbowMethod::Both,
432 _ => diagnostics::RainbowMethod::R, // Default to R
433 };
434
435 match diagnostics::rainbow_test(&y, &x_vars, fraction, method) {
436 Ok(output) => serde_json::to_string(&output)
437 .unwrap_or_else(|_| error_json("Failed to serialize Rainbow test result")),
438 Err(e) => error_json(&e.to_string()),
439 }
440}
441
442/// Performs the Harvey-Collier test for linearity via WASM.
443///
444/// The Harvey-Collier test checks whether the residuals exhibit a linear trend,
445/// which would indicate that the model's functional form is misspecified.
446/// A significant p-value suggests non-linearity.
447///
448/// # Arguments
449///
450/// * `y_json` - JSON array of response variable values
451/// * `x_vars_json` - JSON array of predictor arrays
452///
453/// # Returns
454///
455/// JSON string containing test statistic, p-value, and interpretation.
456///
457/// # Errors
458///
459/// Returns a JSON error object if parsing fails or domain check fails.
460#[cfg(feature = "wasm")]
461#[wasm_bindgen]
462pub fn harvey_collier_test(y_json: &str, x_vars_json: &str) -> String {
463 if let Err(e) = check_domain() {
464 return error_to_json(&e);
465 }
466
467 let y: Vec<f64> = match serde_json::from_str(y_json) {
468 Ok(v) => v,
469 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
470 };
471
472 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
473 Ok(v) => v,
474 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
475 };
476
477 match diagnostics::harvey_collier_test(&y, &x_vars, diagnostics::HarveyCollierMethod::R) {
478 Ok(output) => serde_json::to_string(&output)
479 .unwrap_or_else(|_| error_json("Failed to serialize Harvey-Collier test result")),
480 Err(e) => error_json(&e.to_string()),
481 }
482}
483
484/// Performs the Breusch-Pagan test for heteroscedasticity via WASM.
485///
486/// The Breusch-Pagan test checks whether the variance of residuals is constant
487/// across the range of predicted values (homoscedasticity assumption).
488/// A significant p-value suggests heteroscedasticity.
489///
490/// # Arguments
491///
492/// * `y_json` - JSON array of response variable values
493/// * `x_vars_json` - JSON array of predictor arrays
494///
495/// # Returns
496///
497/// JSON string containing test statistic, p-value, and interpretation.
498///
499/// # Errors
500///
501/// Returns a JSON error object if parsing fails or domain check fails.
502#[cfg(feature = "wasm")]
503#[wasm_bindgen]
504pub fn breusch_pagan_test(y_json: &str, x_vars_json: &str) -> String {
505 if let Err(e) = check_domain() {
506 return error_to_json(&e);
507 }
508
509 let y: Vec<f64> = match serde_json::from_str(y_json) {
510 Ok(v) => v,
511 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
512 };
513
514 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
515 Ok(v) => v,
516 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
517 };
518
519 match diagnostics::breusch_pagan_test(&y, &x_vars) {
520 Ok(output) => serde_json::to_string(&output)
521 .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Pagan test result")),
522 Err(e) => error_json(&e.to_string()),
523 }
524}
525
526/// Performs the White test for heteroscedasticity via WASM.
527///
528/// The White test is a more general test for heteroscedasticity that does not
529/// assume a specific form of heteroscedasticity. A significant p-value suggests
530/// that the error variance is not constant.
531///
532/// # Arguments
533///
534/// * `y_json` - JSON array of response variable values
535/// * `x_vars_json` - JSON array of predictor arrays
536/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
537///
538/// # Returns
539///
540/// JSON string containing test statistic, p-value, and interpretation.
541///
542/// # Errors
543///
544/// Returns a JSON error object if parsing fails or domain check fails.
545#[cfg(feature = "wasm")]
546#[wasm_bindgen]
547pub fn white_test(y_json: &str, x_vars_json: &str, method: &str) -> String {
548 if let Err(e) = check_domain() {
549 return error_to_json(&e);
550 }
551
552 let y: Vec<f64> = match serde_json::from_str(y_json) {
553 Ok(v) => v,
554 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
555 };
556
557 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
558 Ok(v) => v,
559 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
560 };
561
562 // Parse method parameter (default to "r" for R)
563 let method = match method.to_lowercase().as_str() {
564 "python" => diagnostics::WhiteMethod::Python,
565 "both" => diagnostics::WhiteMethod::Both,
566 _ => diagnostics::WhiteMethod::R, // Default to R
567 };
568
569 match diagnostics::white_test(&y, &x_vars, method) {
570 Ok(output) => serde_json::to_string(&output)
571 .unwrap_or_else(|_| error_json("Failed to serialize White test result")),
572 Err(e) => error_json(&e.to_string()),
573 }
574}
575
576/// Performs the R method White test for heteroscedasticity via WASM.
577///
578/// This implementation matches R's `skedastic::white()` function behavior.
579/// Uses the standard QR decomposition and the R-specific auxiliary matrix
580/// structure (intercept, X, X² only - no cross-products).
581///
582/// # Arguments
583///
584/// * `y_json` - JSON array of response variable values
585/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
586///
587/// # Returns
588///
589/// JSON string containing test statistic, p-value, and interpretation.
590///
591/// # Errors
592///
593/// Returns a JSON error object if parsing fails or domain check fails.
594#[cfg(feature = "wasm")]
595#[wasm_bindgen]
596pub fn r_white_test(y_json: &str, x_vars_json: &str) -> String {
597 if let Err(e) = check_domain() {
598 return error_to_json(&e);
599 }
600
601 let y: Vec<f64> = match serde_json::from_str(y_json) {
602 Ok(v) => v,
603 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
604 };
605
606 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
607 Ok(v) => v,
608 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
609 };
610
611 match diagnostics::r_white_method(&y, &x_vars) {
612 Ok(output) => serde_json::to_string(&output)
613 .unwrap_or_else(|_| error_json("Failed to serialize R White test result")),
614 Err(e) => error_json(&e.to_string()),
615 }
616}
617
618/// Performs the Python method White test for heteroscedasticity via WASM.
619///
620/// This implementation matches Python's `statsmodels.stats.diagnostic.het_white()` function.
621/// Uses the LINPACK QR decomposition with column pivoting and the Python-specific
622/// auxiliary matrix structure (intercept, X, X², and cross-products).
623///
624/// # Arguments
625///
626/// * `y_json` - JSON array of response variable values
627/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
628///
629/// # Returns
630///
631/// JSON string containing test statistic, p-value, and interpretation.
632///
633/// # Errors
634///
635/// Returns a JSON error object if parsing fails or domain check fails.
636#[cfg(feature = "wasm")]
637#[wasm_bindgen]
638pub fn python_white_test(y_json: &str, x_vars_json: &str) -> String {
639 if let Err(e) = check_domain() {
640 return error_to_json(&e);
641 }
642
643 let y: Vec<f64> = match serde_json::from_str(y_json) {
644 Ok(v) => v,
645 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
646 };
647
648 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
649 Ok(v) => v,
650 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
651 };
652
653 match diagnostics::python_white_method(&y, &x_vars) {
654 Ok(output) => serde_json::to_string(&output)
655 .unwrap_or_else(|_| error_json("Failed to serialize Python White test result")),
656 Err(e) => error_json(&e.to_string()),
657 }
658}
659
660/// Performs the Jarque-Bera test for normality via WASM.
661///
662/// The Jarque-Bera test checks whether the residuals are normally distributed
663/// by examining skewness and kurtosis. A significant p-value suggests that
664/// the residuals deviate from normality.
665///
666/// # Arguments
667///
668/// * `y_json` - JSON array of response variable values
669/// * `x_vars_json` - JSON array of predictor arrays
670///
671/// # Returns
672///
673/// JSON string containing test statistic, p-value, and interpretation.
674///
675/// # Errors
676///
677/// Returns a JSON error object if parsing fails or domain check fails.
678#[cfg(feature = "wasm")]
679#[wasm_bindgen]
680pub fn jarque_bera_test(y_json: &str, x_vars_json: &str) -> String {
681 if let Err(e) = check_domain() {
682 return error_to_json(&e);
683 }
684
685 let y: Vec<f64> = match serde_json::from_str(y_json) {
686 Ok(v) => v,
687 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
688 };
689
690 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
691 Ok(v) => v,
692 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
693 };
694
695 match diagnostics::jarque_bera_test(&y, &x_vars) {
696 Ok(output) => serde_json::to_string(&output)
697 .unwrap_or_else(|_| error_json("Failed to serialize Jarque-Bera test result")),
698 Err(e) => error_json(&e.to_string()),
699 }
700}
701
702// ============================================================================
703// Durbin-Watson Test (WASM wrapper)
704// ============================================================================
705
706/// Performs the Durbin-Watson test for autocorrelation via WASM.
707///
708/// The Durbin-Watson test checks for autocorrelation in the residuals.
709/// Values near 2 indicate no autocorrelation, values near 0 suggest positive
710/// autocorrelation, and values near 4 suggest negative autocorrelation.
711///
712/// # Arguments
713///
714/// * `y_json` - JSON array of response variable values
715/// * `x_vars_json` - JSON array of predictor arrays
716///
717/// # Returns
718///
719/// JSON string containing the DW statistic and interpretation.
720///
721/// # Errors
722///
723/// Returns a JSON error object if parsing fails or domain check fails.
724#[cfg(feature = "wasm")]
725#[wasm_bindgen]
726pub fn durbin_watson_test(y_json: &str, x_vars_json: &str) -> String {
727 if let Err(e) = check_domain() {
728 return error_to_json(&e);
729 }
730
731 let y: Vec<f64> = match serde_json::from_str(y_json) {
732 Ok(v) => v,
733 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
734 };
735
736 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
737 Ok(v) => v,
738 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
739 };
740
741 match diagnostics::durbin_watson_test(&y, &x_vars) {
742 Ok(output) => serde_json::to_string(&output)
743 .unwrap_or_else(|_| error_json("Failed to serialize Durbin-Watson test result")),
744 Err(e) => error_json(&e.to_string()),
745 }
746}
747
748// ============================================================================
749// Shapiro-Wilk Test (WASM wrapper)
750// ============================================================================
751
752/// Performs the Shapiro-Wilk test for normality via WASM.
753///
754/// The Shapiro-Wilk test is a powerful test for normality,
755/// especially for small to moderate sample sizes (3 ≤ n ≤ 5000). It tests
756/// the null hypothesis that the residuals are normally distributed.
757///
758/// # Arguments
759///
760/// * `y_json` - JSON array of response variable values
761/// * `x_vars_json` - JSON array of predictor arrays
762///
763/// # Returns
764///
765/// JSON string containing the W statistic (ranges from 0 to 1), p-value,
766/// and interpretation.
767///
768/// # Errors
769///
770/// Returns a JSON error object if parsing fails or domain check fails.
771#[cfg(feature = "wasm")]
772#[wasm_bindgen]
773pub fn shapiro_wilk_test(y_json: &str, x_vars_json: &str) -> String {
774 if let Err(e) = check_domain() {
775 return error_to_json(&e);
776 }
777
778 let y: Vec<f64> = match serde_json::from_str(y_json) {
779 Ok(v) => v,
780 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
781 };
782
783 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
784 Ok(v) => v,
785 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
786 };
787
788 match diagnostics::shapiro_wilk_test(&y, &x_vars) {
789 Ok(output) => serde_json::to_string(&output)
790 .unwrap_or_else(|_| error_json("Failed to serialize Shapiro-Wilk test result")),
791 Err(e) => error_json(&e.to_string()),
792 }
793}
794
795/// Performs the Anderson-Darling test for normality via WASM.
796///
797/// The Anderson-Darling test checks whether the residuals are normally distributed
798/// by comparing the empirical distribution to the expected normal distribution.
799/// This test is particularly sensitive to deviations in the tails of the distribution.
800/// A significant p-value suggests that the residuals deviate from normality.
801///
802/// # Arguments
803///
804/// * `y_json` - JSON array of response variable values
805/// * `x_vars_json` - JSON array of predictor arrays
806///
807/// # Returns
808///
809/// JSON string containing the A² statistic, p-value, and interpretation.
810///
811/// # Errors
812///
813/// Returns a JSON error object if parsing fails or domain check fails.
814#[cfg(feature = "wasm")]
815#[wasm_bindgen]
816pub fn anderson_darling_test(y_json: &str, x_vars_json: &str) -> String {
817 if let Err(e) = check_domain() {
818 return error_to_json(&e);
819 }
820
821 let y: Vec<f64> = match serde_json::from_str(y_json) {
822 Ok(v) => v,
823 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
824 };
825
826 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
827 Ok(v) => v,
828 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
829 };
830
831 match diagnostics::anderson_darling_test(&y, &x_vars) {
832 Ok(output) => serde_json::to_string(&output)
833 .unwrap_or_else(|_| error_json("Failed to serialize Anderson-Darling test result")),
834 Err(e) => error_json(&e.to_string()),
835 }
836}
837
838// ============================================================================
839// Cook's Distance (WASM wrapper)
840// ============================================================================
841
842/// Computes Cook's distance for identifying influential observations via WASM.
843///
844/// Cook's distance measures how much each observation influences the regression
845/// model by comparing coefficient estimates with and without that observation.
846/// Unlike hypothesis tests, this is an influence measure - not a test with p-values.
847///
848/// # Arguments
849///
850/// * `y_json` - JSON array of response variable values
851/// * `x_vars_json` - JSON array of predictor arrays
852///
853/// # Returns
854///
855/// JSON string containing:
856/// - Vector of Cook's distances (one per observation)
857/// - Thresholds for identifying influential observations
858/// - Indices of potentially influential observations
859/// - Interpretation and guidance
860///
861/// # Errors
862///
863/// Returns a JSON error object if parsing fails or domain check fails.
864#[cfg(feature = "wasm")]
865#[wasm_bindgen]
866pub fn cooks_distance_test(y_json: &str, x_vars_json: &str) -> String {
867 if let Err(e) = check_domain() {
868 return error_to_json(&e);
869 }
870
871 let y: Vec<f64> = match serde_json::from_str(y_json) {
872 Ok(v) => v,
873 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
874 };
875
876 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
877 Ok(v) => v,
878 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
879 };
880
881 match diagnostics::cooks_distance_test(&y, &x_vars) {
882 Ok(output) => serde_json::to_string(&output)
883 .unwrap_or_else(|_| error_json("Failed to serialize Cook's distance result")),
884 Err(e) => error_json(&e.to_string()),
885 }
886}
887
888/// Performs the RESET test for model specification error via WASM.
889///
890/// The RESET (Regression Specification Error Test) test checks whether the model
891/// is correctly specified by testing if additional terms (powers of fitted values,
892/// regressors, or first principal component) significantly improve the model fit.
893///
894/// # Arguments
895///
896/// * `y_json` - JSON array of response variable values
897/// * `x_vars_json` - JSON array of predictor arrays
898/// * `powers_json` - JSON array of powers to use (e.g., [2, 3] for ŷ², ŷ³)
899/// * `type_` - Type of terms to add: "fitted", "regressor", or "princomp"
900///
901/// # Returns
902///
903/// JSON string containing the F-statistic, p-value, and interpretation.
904///
905/// # Errors
906///
907/// Returns a JSON error object if parsing fails or domain check fails.
908#[cfg(feature = "wasm")]
909#[wasm_bindgen]
910pub fn reset_test(y_json: &str, x_vars_json: &str, powers_json: &str, type_: &str) -> String {
911 if let Err(e) = check_domain() {
912 return error_to_json(&e);
913 }
914
915 let y: Vec<f64> = match serde_json::from_str(y_json) {
916 Ok(v) => v,
917 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
918 };
919
920 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
921 Ok(v) => v,
922 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
923 };
924
925 let powers: Vec<usize> = match serde_json::from_str(powers_json) {
926 Ok(v) => v,
927 Err(e) => return error_json(&format!("Failed to parse powers: {}", e)),
928 };
929
930 // Parse reset type (default to "fitted")
931 let reset_type = match type_.to_lowercase().as_str() {
932 "regressor" => diagnostics::ResetType::Regressor,
933 "princomp" => diagnostics::ResetType::PrincipalComponent,
934 _ => diagnostics::ResetType::Fitted,
935 };
936
937 match diagnostics::reset_test(&y, &x_vars, &powers, reset_type) {
938 Ok(output) => serde_json::to_string(&output)
939 .unwrap_or_else(|_| error_json("Failed to serialize RESET test result")),
940 Err(e) => error_json(&e.to_string()),
941 }
942}
943
944/// Performs the Breusch-Godfrey test for higher-order serial correlation via WASM.
945///
946/// Unlike the Durbin-Watson test which only detects first-order autocorrelation,
947/// the Breusch-Godfrey test can detect serial correlation at any lag order.
948///
949/// # Arguments
950///
951/// * `y_json` - JSON array of response variable values
952/// * `x_vars_json` - JSON array of predictor arrays
953/// * `order` - Maximum order of serial correlation to test (default: 1)
954/// * `test_type` - Type of test statistic: "chisq" or "f" (default: "chisq")
955///
956/// # Returns
957///
958/// JSON string containing test statistic, p-value, degrees of freedom, and interpretation.
959///
960/// # Errors
961///
962/// Returns a JSON error object if parsing fails or domain check fails.
963#[cfg(feature = "wasm")]
964#[wasm_bindgen]
965pub fn breusch_godfrey_test(y_json: &str, x_vars_json: &str, order: usize, test_type: &str) -> String {
966 if let Err(e) = check_domain() {
967 return error_to_json(&e);
968 }
969
970 let y: Vec<f64> = match serde_json::from_str(y_json) {
971 Ok(v) => v,
972 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
973 };
974
975 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
976 Ok(v) => v,
977 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
978 };
979
980 // Parse test type (default to "chisq")
981 let bg_test_type = match test_type.to_lowercase().as_str() {
982 "f" => diagnostics::BGTestType::F,
983 _ => diagnostics::BGTestType::Chisq,
984 };
985
986 match diagnostics::breusch_godfrey_test(&y, &x_vars, order, bg_test_type) {
987 Ok(output) => serde_json::to_string(&output)
988 .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Godfrey test result")),
989 Err(e) => error_json(&e.to_string()),
990 }
991}
992
993// ============================================================================
994// Regularized Regression WASM Wrappers
995// ============================================================================
996
997#[cfg(feature = "wasm")]
998#[wasm_bindgen]
999/// Performs Ridge regression via WASM.
1000///
1001/// Ridge regression adds an L2 penalty to the coefficients, which helps with
1002/// multicollinearity and overfitting. The intercept is never penalized.
1003///
1004/// # Arguments
1005///
1006/// * `y_json` - JSON array of response variable values
1007/// * `x_vars_json` - JSON array of predictor arrays
1008/// * `variable_names` - JSON array of variable names
1009/// * `lambda` - Regularization strength (>= 0, typical range 0.01 to 100)
1010/// * `standardize` - Whether to standardize predictors (recommended: true)
1011///
1012/// # Returns
1013///
1014/// JSON string containing:
1015/// - `lambda` - Lambda value used
1016/// - `intercept` - Intercept coefficient
1017/// - `coefficients` - Slope coefficients
1018/// - `fitted_values` - Predictions on training data
1019/// - `residuals` - Residuals (y - fitted_values)
1020/// - `df` - Effective degrees of freedom
1021///
1022/// # Errors
1023///
1024/// Returns a JSON error object if parsing fails, lambda is negative,
1025/// or domain check fails.
1026pub fn ridge_regression(
1027 y_json: &str,
1028 x_vars_json: &str,
1029 _variable_names: &str,
1030 lambda: f64,
1031 standardize: bool,
1032) -> String {
1033 if let Err(e) = check_domain() {
1034 return error_to_json(&e);
1035 }
1036
1037 // Parse JSON input
1038 let y: Vec<f64> = match serde_json::from_str(y_json) {
1039 Ok(v) => v,
1040 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1041 };
1042
1043 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1044 Ok(v) => v,
1045 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1046 };
1047
1048 // Build design matrix with intercept column
1049 let n = y.len();
1050 let p = x_vars.len();
1051
1052 if n <= p + 1 {
1053 return error_json(&format!(
1054 "Insufficient data: need at least {} observations for {} predictors",
1055 p + 2,
1056 p
1057 ));
1058 }
1059
1060 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1061 for (j, x_var) in x_vars.iter().enumerate() {
1062 if x_var.len() != n {
1063 return error_json(&format!(
1064 "x_vars[{}] has {} elements, expected {}",
1065 j,
1066 x_var.len(),
1067 n
1068 ));
1069 }
1070 for (i, &val) in x_var.iter().enumerate() {
1071 x_data[i * (p + 1) + j + 1] = val;
1072 }
1073 }
1074
1075 let x = linalg::Matrix::new(n, p + 1, x_data);
1076
1077 // Configure ridge options
1078 let options = regularized::ridge::RidgeFitOptions {
1079 lambda,
1080 intercept: true,
1081 standardize,
1082 max_iter: 100000,
1083 tol: 1e-7,
1084 warm_start: None,
1085 weights: None,
1086 };
1087
1088 match regularized::ridge::ridge_fit(&x, &y, &options) {
1089 Ok(output) => serde_json::to_string(&output)
1090 .unwrap_or_else(|_| error_json("Failed to serialize ridge regression result")),
1091 Err(e) => error_json(&e.to_string()),
1092 }
1093}
1094
1095#[cfg(feature = "wasm")]
1096#[wasm_bindgen]
1097/// Performs Lasso regression via WASM.
1098///
1099/// Lasso regression adds an L1 penalty to the coefficients, which performs
1100/// automatic variable selection by shrinking some coefficients to exactly zero.
1101/// The intercept is never penalized.
1102///
1103/// # Arguments
1104///
1105/// * `y_json` - JSON array of response variable values
1106/// * `x_vars_json` - JSON array of predictor arrays
1107/// * `variable_names` - JSON array of variable names
1108/// * `lambda` - Regularization strength (>= 0, typical range 0.01 to 10)
1109/// * `standardize` - Whether to standardize predictors (recommended: true)
1110/// * `max_iter` - Maximum coordinate descent iterations (default: 100000)
1111/// * `tol` - Convergence tolerance (default: 1e-7)
1112///
1113/// # Returns
1114///
1115/// JSON string containing:
1116/// - `lambda` - Lambda value used
1117/// - `intercept` - Intercept coefficient
1118/// - `coefficients` - Slope coefficients (some may be exactly zero)
1119/// - `fitted_values` - Predictions on training data
1120/// - `residuals` - Residuals (y - fitted_values)
1121/// - `n_nonzero` - Number of non-zero coefficients (excluding intercept)
1122/// - `iterations` - Number of coordinate descent iterations
1123/// - `converged` - Whether the algorithm converged
1124///
1125/// # Errors
1126///
1127/// Returns a JSON error object if parsing fails, lambda is negative,
1128/// or domain check fails.
1129pub fn lasso_regression(
1130 y_json: &str,
1131 x_vars_json: &str,
1132 _variable_names: &str,
1133 lambda: f64,
1134 standardize: bool,
1135 max_iter: usize,
1136 tol: f64,
1137) -> String {
1138 if let Err(e) = check_domain() {
1139 return error_to_json(&e);
1140 }
1141
1142 // Parse JSON input
1143 let y: Vec<f64> = match serde_json::from_str(y_json) {
1144 Ok(v) => v,
1145 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1146 };
1147
1148 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1149 Ok(v) => v,
1150 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1151 };
1152
1153 // Build design matrix with intercept column
1154 let n = y.len();
1155 let p = x_vars.len();
1156
1157 if n <= p + 1 {
1158 return error_json(&format!(
1159 "Insufficient data: need at least {} observations for {} predictors",
1160 p + 2,
1161 p
1162 ));
1163 }
1164
1165 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1166 for (j, x_var) in x_vars.iter().enumerate() {
1167 if x_var.len() != n {
1168 return error_json(&format!(
1169 "x_vars[{}] has {} elements, expected {}",
1170 j,
1171 x_var.len(),
1172 n
1173 ));
1174 }
1175 for (i, &val) in x_var.iter().enumerate() {
1176 x_data[i * (p + 1) + j + 1] = val;
1177 }
1178 }
1179
1180 let x = linalg::Matrix::new(n, p + 1, x_data);
1181
1182 // Configure lasso options
1183 let options = regularized::lasso::LassoFitOptions {
1184 lambda,
1185 intercept: true,
1186 standardize,
1187 max_iter,
1188 tol,
1189 ..Default::default()
1190 };
1191
1192 match regularized::lasso::lasso_fit(&x, &y, &options) {
1193 Ok(output) => serde_json::to_string(&output)
1194 .unwrap_or_else(|_| error_json("Failed to serialize lasso regression result")),
1195 Err(e) => error_json(&e.to_string()),
1196 }
1197}
1198
1199#[cfg(feature = "wasm")]
1200#[wasm_bindgen]
1201#[allow(clippy::too_many_arguments)]
1202/// Performs Elastic Net regression via WASM.
1203///
1204/// Elastic Net combines L1 (Lasso) and L2 (Ridge) penalties.
1205///
1206/// # Arguments
1207///
1208/// * `y_json` - JSON array of response variable values
1209/// * `x_vars_json` - JSON array of predictor arrays
1210/// * `variable_names` - JSON array of variable names
1211/// * `lambda` - Regularization strength (>= 0)
1212/// * `alpha` - Elastic net mixing parameter (0 = Ridge, 1 = Lasso)
1213/// * `standardize` - Whether to standardize predictors (recommended: true)
1214/// * `max_iter` - Maximum coordinate descent iterations
1215/// * `tol` - Convergence tolerance
1216///
1217/// # Returns
1218///
1219/// JSON string containing regression results (same fields as Lasso).
1220///
1221/// # Errors
1222///
1223/// Returns a JSON error object if parsing fails, parameters are invalid,
1224/// or domain check fails.
1225pub fn elastic_net_regression(
1226 y_json: &str,
1227 x_vars_json: &str,
1228 _variable_names: &str,
1229 lambda: f64,
1230 alpha: f64,
1231 standardize: bool,
1232 max_iter: usize,
1233 tol: f64,
1234) -> String {
1235 if let Err(e) = check_domain() {
1236 return error_to_json(&e);
1237 }
1238
1239 // Parse JSON input
1240 let y: Vec<f64> = match serde_json::from_str(y_json) {
1241 Ok(v) => v,
1242 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1243 };
1244
1245 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1246 Ok(v) => v,
1247 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1248 };
1249
1250 // Build design matrix with intercept column
1251 let n = y.len();
1252 let p = x_vars.len();
1253
1254 if n <= p + 1 {
1255 return error_json(&format!(
1256 "Insufficient data: need at least {} observations for {} predictors",
1257 p + 2,
1258 p
1259 ));
1260 }
1261
1262 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1263 for (j, x_var) in x_vars.iter().enumerate() {
1264 if x_var.len() != n {
1265 return error_json(&format!(
1266 "x_vars[{}] has {} elements, expected {}",
1267 j,
1268 x_var.len(),
1269 n
1270 ));
1271 }
1272 for (i, &val) in x_var.iter().enumerate() {
1273 x_data[i * (p + 1) + j + 1] = val;
1274 }
1275 }
1276
1277 let x = linalg::Matrix::new(n, p + 1, x_data);
1278
1279 // Configure elastic net options
1280 let options = regularized::elastic_net::ElasticNetOptions {
1281 lambda,
1282 alpha,
1283 intercept: true,
1284 standardize,
1285 max_iter,
1286 tol,
1287 ..Default::default()
1288 };
1289
1290 match regularized::elastic_net::elastic_net_fit(&x, &y, &options) {
1291 Ok(output) => serde_json::to_string(&output)
1292 .unwrap_or_else(|_| error_json("Failed to serialize elastic net regression result")),
1293 Err(e) => error_json(&e.to_string()),
1294 }
1295}
1296
1297#[cfg(feature = "wasm")]
1298#[wasm_bindgen]
1299/// Generates a lambda path for regularized regression via WASM.
1300///
1301/// Creates a logarithmically-spaced sequence of lambda values from lambda_max
1302/// (where all penalized coefficients are zero) down to lambda_min. This is
1303/// useful for fitting regularization paths and selecting optimal lambda via
1304/// cross-validation.
1305///
1306/// # Arguments
1307///
1308/// * `y_json` - JSON array of response variable values
1309/// * `x_vars_json` - JSON array of predictor arrays
1310/// * `n_lambda` - Number of lambda values to generate (default: 100)
1311/// * `lambda_min_ratio` - Ratio for smallest lambda (default: 0.0001 if n >= p, else 0.01)
1312///
1313/// # Returns
1314///
1315/// JSON string containing:
1316/// - `lambda_path` - Array of lambda values in decreasing order
1317/// - `lambda_max` - Maximum lambda value
1318/// - `lambda_min` - Minimum lambda value
1319/// - `n_lambda` - Number of lambda values
1320///
1321/// # Errors
1322///
1323/// Returns a JSON error object if parsing fails or domain check fails.
1324pub fn make_lambda_path(
1325 y_json: &str,
1326 x_vars_json: &str,
1327 n_lambda: usize,
1328 lambda_min_ratio: f64,
1329) -> String {
1330 if let Err(e) = check_domain() {
1331 return error_to_json(&e);
1332 }
1333
1334 // Parse JSON input
1335 let y: Vec<f64> = match serde_json::from_str(y_json) {
1336 Ok(v) => v,
1337 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1338 };
1339
1340 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1341 Ok(v) => v,
1342 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1343 };
1344
1345 // Build design matrix with intercept column
1346 let n = y.len();
1347 let p = x_vars.len();
1348
1349 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1350 for (j, x_var) in x_vars.iter().enumerate() {
1351 if x_var.len() != n {
1352 return error_json(&format!(
1353 "x_vars[{}] has {} elements, expected {}",
1354 j,
1355 x_var.len(),
1356 n
1357 ));
1358 }
1359 for (i, &val) in x_var.iter().enumerate() {
1360 x_data[i * (p + 1) + j + 1] = val;
1361 }
1362 }
1363
1364 let x = linalg::Matrix::new(n, p + 1, x_data);
1365
1366 // Standardize X for lambda path computation
1367 let x_mean: Vec<f64> = (0..x.cols)
1368 .map(|j| {
1369 if j == 0 {
1370 1.0 // Intercept column
1371 } else {
1372 (0..n).map(|i| x.get(i, j)).sum::<f64>() / n as f64
1373 }
1374 })
1375 .collect();
1376
1377 let x_standardized: Vec<f64> = (0..x.cols)
1378 .map(|j| {
1379 if j == 0 {
1380 0.0 // Intercept column - no centering
1381 } else {
1382 let mean = x_mean[j];
1383 let variance =
1384 (0..n).map(|i| (x.get(i, j) - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
1385 variance.sqrt()
1386 }
1387 })
1388 .collect();
1389
1390 // Build standardized X matrix
1391 let mut x_standardized_data = vec![1.0; n * (p + 1)];
1392 for j in 0..x.cols {
1393 for i in 0..n {
1394 if j == 0 {
1395 x_standardized_data[i * (p + 1)] = 1.0; // Intercept
1396 } else {
1397 let std = x_standardized[j];
1398 if std > 1e-10 {
1399 x_standardized_data[i * (p + 1) + j] = (x.get(i, j) - x_mean[j]) / std;
1400 } else {
1401 x_standardized_data[i * (p + 1) + j] = 0.0;
1402 }
1403 }
1404 }
1405 }
1406 let x_standardized = linalg::Matrix::new(n, p + 1, x_standardized_data);
1407
1408 // Center y
1409 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
1410 let y_centered: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
1411
1412 // Configure lambda path options
1413 let options = regularized::path::LambdaPathOptions {
1414 nlambda: n_lambda.max(1),
1415 lambda_min_ratio: if lambda_min_ratio > 0.0 {
1416 Some(lambda_min_ratio)
1417 } else {
1418 None
1419 },
1420 alpha: 1.0, // Lasso
1421 ..Default::default()
1422 };
1423
1424 let lambda_path =
1425 regularized::path::make_lambda_path(&x_standardized, &y_centered, &options, None, Some(0));
1426
1427 let lambda_max = lambda_path.first().copied().unwrap_or(0.0);
1428 let lambda_min = lambda_path.last().copied().unwrap_or(0.0);
1429
1430 // Return as JSON (note: infinity serializes as null in JSON, handled in JS)
1431 let result = serde_json::json!({
1432 "lambda_path": lambda_path,
1433 "lambda_max": lambda_max,
1434 "lambda_min": lambda_min,
1435 "n_lambda": lambda_path.len()
1436 });
1437
1438 result.to_string()
1439}
1440
1441// ============================================================================
1442// Statistical Utility Functions (WASM wrappers)
1443// ============================================================================
1444
1445#[cfg(feature = "wasm")]
1446#[wasm_bindgen]
1447/// Computes the Student's t-distribution cumulative distribution function.
1448///
1449/// Returns P(T ≤ t) for a t-distribution with the given degrees of freedom.
1450///
1451/// # Arguments
1452///
1453/// * `t` - t-statistic value
1454/// * `df` - Degrees of freedom
1455///
1456/// # Returns
1457///
1458/// The CDF value, or `NaN` if domain check fails.
1459pub fn get_t_cdf(t: f64, df: f64) -> f64 {
1460 if check_domain().is_err() {
1461 return f64::NAN;
1462 }
1463
1464 student_t_cdf(t, df)
1465}
1466
1467#[cfg(feature = "wasm")]
1468#[wasm_bindgen]
1469/// Computes the critical t-value for a given significance level.
1470///
1471/// Returns the t-value such that the area under the t-distribution curve
1472/// to the right equals alpha/2 (two-tailed test).
1473///
1474/// # Arguments
1475///
1476/// * `alpha` - Significance level (typically 0.05 for 95% confidence)
1477/// * `df` - Degrees of freedom
1478///
1479/// # Returns
1480///
1481/// The critical t-value, or `NaN` if domain check fails.
1482pub fn get_t_critical(alpha: f64, df: f64) -> f64 {
1483 if check_domain().is_err() {
1484 return f64::NAN;
1485 }
1486
1487 core::t_critical_quantile(df, alpha)
1488}
1489
1490#[cfg(feature = "wasm")]
1491#[wasm_bindgen]
1492/// Computes the inverse of the standard normal CDF (probit function).
1493///
1494/// Returns the z-score such that P(Z ≤ z) = p for a standard normal distribution.
1495///
1496/// # Arguments
1497///
1498/// * `p` - Probability (0 < p < 1)
1499///
1500/// # Returns
1501///
1502/// The z-score, or `NaN` if domain check fails.
1503pub fn get_normal_inverse(p: f64) -> f64 {
1504 if check_domain().is_err() {
1505 return f64::NAN;
1506 }
1507
1508 normal_inverse_cdf(p)
1509}
1510
1511// ============================================================================
1512// Statistical Utilities (WASM-only)
1513// ============================================================================
1514
1515#[cfg(feature = "wasm")]
1516#[wasm_bindgen]
1517/// Computes the arithmetic mean of a JSON array of f64 values.
1518///
1519/// # Arguments
1520///
1521/// * `data_json` - JSON string representing an array of f64 values
1522///
1523/// # Returns
1524///
1525/// JSON string with the mean, or "null" if input is invalid/empty
1526pub fn stats_mean(data_json: String) -> String {
1527 if check_domain().is_err() {
1528 return "null".to_string();
1529 }
1530
1531 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1532 Ok(d) => d,
1533 Err(_) => return "null".to_string(),
1534 };
1535
1536 serde_json::to_string(&stats::mean(&data)).unwrap_or("null".to_string())
1537}
1538
1539#[cfg(feature = "wasm")]
1540#[wasm_bindgen]
1541/// Computes the sample standard deviation of a JSON array of f64 values.
1542///
1543/// Uses the (n-1) denominator for unbiased estimation.
1544///
1545/// # Arguments
1546///
1547/// * `data_json` - JSON string representing an array of f64 values
1548///
1549/// # Returns
1550///
1551/// JSON string with the standard deviation, or "null" if input is invalid
1552pub fn stats_stddev(data_json: String) -> String {
1553 if check_domain().is_err() {
1554 return "null".to_string();
1555 }
1556
1557 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1558 Ok(d) => d,
1559 Err(_) => return "null".to_string(),
1560 };
1561
1562 serde_json::to_string(&stats::stddev(&data)).unwrap_or("null".to_string())
1563}
1564
1565#[cfg(feature = "wasm")]
1566#[wasm_bindgen]
1567/// Computes the sample variance of a JSON array of f64 values.
1568///
1569/// Uses the (n-1) denominator for unbiased estimation.
1570///
1571/// # Arguments
1572///
1573/// * `data_json` - JSON string representing an array of f64 values
1574///
1575/// # Returns
1576///
1577/// JSON string with the variance, or "null" if input is invalid
1578pub fn stats_variance(data_json: String) -> String {
1579 if check_domain().is_err() {
1580 return "null".to_string();
1581 }
1582
1583 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1584 Ok(d) => d,
1585 Err(_) => return "null".to_string(),
1586 };
1587
1588 serde_json::to_string(&stats::variance(&data)).unwrap_or("null".to_string())
1589}
1590
1591#[cfg(feature = "wasm")]
1592#[wasm_bindgen]
1593/// Computes the median of a JSON array of f64 values.
1594///
1595/// # Arguments
1596///
1597/// * `data_json` - JSON string representing an array of f64 values
1598///
1599/// # Returns
1600///
1601/// JSON string with the median, or "null" if input is invalid/empty
1602pub fn stats_median(data_json: String) -> String {
1603 if check_domain().is_err() {
1604 return "null".to_string();
1605 }
1606
1607 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1608 Ok(d) => d,
1609 Err(_) => return "null".to_string(),
1610 };
1611
1612 serde_json::to_string(&stats::median(&data)).unwrap_or("null".to_string())
1613}
1614
1615#[cfg(feature = "wasm")]
1616#[wasm_bindgen]
1617/// Computes a quantile of a JSON array of f64 values.
1618///
1619/// # Arguments
1620///
1621/// * `data_json` - JSON string representing an array of f64 values
1622/// * `q` - Quantile to calculate (0.0 to 1.0)
1623///
1624/// # Returns
1625///
1626/// JSON string with the quantile value, or "null" if input is invalid
1627pub fn stats_quantile(data_json: String, q: f64) -> String {
1628 if check_domain().is_err() {
1629 return "null".to_string();
1630 }
1631
1632 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1633 Ok(d) => d,
1634 Err(_) => return "null".to_string(),
1635 };
1636
1637 serde_json::to_string(&stats::quantile(&data, q)).unwrap_or("null".to_string())
1638}
1639
1640#[cfg(feature = "wasm")]
1641#[wasm_bindgen]
1642/// Computes the correlation coefficient between two JSON arrays of f64 values.
1643///
1644/// # Arguments
1645///
1646/// * `x_json` - JSON string representing the first array of f64 values
1647/// * `y_json` - JSON string representing the second array of f64 values
1648///
1649/// # Returns
1650///
1651/// JSON string with the correlation coefficient, or "null" if input is invalid
1652pub fn stats_correlation(x_json: String, y_json: String) -> String {
1653 if check_domain().is_err() {
1654 return "null".to_string();
1655 }
1656
1657 let x: Vec<f64> = match serde_json::from_str(&x_json) {
1658 Ok(d) => d,
1659 Err(_) => return "null".to_string(),
1660 };
1661
1662 let y: Vec<f64> = match serde_json::from_str(&y_json) {
1663 Ok(d) => d,
1664 Err(_) => return "null".to_string(),
1665 };
1666
1667 serde_json::to_string(&stats::correlation(&x, &y)).unwrap_or("null".to_string())
1668}
1669
1670// ============================================================================
1671// Domain Check (WASM-only)
1672// ============================================================================
1673//
1674// By default, all domains are allowed. To enable domain restriction, set the
1675// LINREG_DOMAIN_RESTRICT environment variable at build time:
1676//
1677// LINREG_DOMAIN_RESTRICT=example.com,yoursite.com wasm-pack build
1678//
1679// Example for jesse-anderson.net:
1680// LINREG_DOMAIN_RESTRICT=jesse-anderson.net,tools.jesse-anderson.net,localhost,127.0.0.1 wasm-pack build
1681//
1682// This allows downstream users to use the library without modification while
1683// still providing domain restriction as an opt-in security feature.
1684
1685#[cfg(feature = "wasm")]
1686fn check_domain() -> Result<()> {
1687 // Read allowed domains from build-time environment variable
1688 let allowed_domains = option_env!("LINREG_DOMAIN_RESTRICT");
1689
1690 match allowed_domains {
1691 Some(domains) if !domains.is_empty() => {
1692 // Domain restriction is enabled
1693 let window =
1694 web_sys::window().ok_or(Error::DomainCheck("No window found".to_string()))?;
1695 let location = window.location();
1696 let hostname = location
1697 .hostname()
1698 .map_err(|_| Error::DomainCheck("No hostname found".to_string()))?;
1699
1700 let domain_list: Vec<&str> = domains.split(',').map(|s| s.trim()).collect();
1701
1702 if domain_list.contains(&hostname.as_str()) {
1703 Ok(())
1704 } else {
1705 Err(Error::DomainCheck(format!(
1706 "Unauthorized domain: {}. Allowed: {}",
1707 hostname, domains
1708 )))
1709 }
1710 },
1711 _ => {
1712 // No restriction - allow all domains
1713 Ok(())
1714 },
1715 }
1716}
1717
1718// ============================================================================
1719// Test Functions (WASM-only)
1720// ============================================================================
1721
1722#[cfg(test)]
1723mod tests {
1724 use super::*;
1725
1726 #[test]
1727 fn verify_housing_regression_integrity() {
1728 let result = test_housing_regression_native();
1729 if let Err(e) = result {
1730 panic!("Regression test failed: {}", e);
1731 }
1732 }
1733
1734 /// Test that test_housing_regression_native produces valid JSON
1735 #[test]
1736 fn test_housing_regression_json_output() {
1737 let result = test_housing_regression_native().unwrap();
1738 // Should be valid JSON
1739 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
1740 // Should have status field
1741 assert!(parsed.get("status").is_some());
1742 // Status should be PASS (we control the test data)
1743 assert_eq!(parsed["status"], "PASS");
1744 }
1745
1746 /// Test housing regression with actual R reference values
1747 #[test]
1748 fn test_housing_regression_coefficients() {
1749 let y = vec![
1750 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9,
1751 367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7,
1752 267.9,
1753 ];
1754
1755 let square_feet = vec![
1756 1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1757 2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1758 1250.0, 1700.0, 850.0, 2350.0, 1400.0,
1759 ];
1760 let bedrooms = vec![
1761 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0,
1762 3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
1763 ];
1764 let age = vec![
1765 15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0,
1766 22.0, 1.0, 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
1767 ];
1768
1769 let x_vars = vec![square_feet, bedrooms, age];
1770 let names = vec![
1771 "Intercept".to_string(),
1772 "Square_Feet".to_string(),
1773 "Bedrooms".to_string(),
1774 "Age".to_string(),
1775 ];
1776
1777 let result = core::ols_regression(&y, &x_vars, &names).unwrap();
1778
1779 // Check against R results
1780 let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
1781 let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
1782
1783 let tolerance = 1e-4;
1784 for i in 0..4 {
1785 assert!(
1786 (result.coefficients[i] - expected_coeffs[i]).abs() < tolerance,
1787 "coeff[{}] differs: got {}, expected {}",
1788 i,
1789 result.coefficients[i],
1790 expected_coeffs[i]
1791 );
1792 assert!(
1793 (result.std_errors[i] - expected_std_errs[i]).abs() < tolerance,
1794 "std_err[{}] differs: got {}, expected {}",
1795 i,
1796 result.std_errors[i],
1797 expected_std_errs[i]
1798 );
1799 }
1800 }
1801
1802 /// Test R-squared calculation in housing regression
1803 #[test]
1804 fn test_housing_regression_r_squared() {
1805 let result = test_housing_regression_native().unwrap();
1806 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
1807
1808 // If status is PASS, R² should be reasonable (between 0 and 1)
1809 assert_eq!(parsed["status"], "PASS");
1810 }
1811
1812 /// Test that housing regression handles all expected output fields
1813 #[test]
1814 fn test_housing_regression_comprehensive() {
1815 let y = vec![
1816 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1,
1817 ];
1818 let x1 = vec![1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0];
1819 let x2 = vec![3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0];
1820
1821 let result = core::ols_regression(&y, &[x1, x2], &["Intercept".into(), "X1".into(), "X2".into()])
1822 .unwrap();
1823
1824 // Verify expected output fields exist
1825 assert!(!result.coefficients.is_empty());
1826 assert!(!result.std_errors.is_empty());
1827 assert!(!result.t_stats.is_empty());
1828 assert!(!result.p_values.is_empty());
1829 assert!(result.r_squared >= 0.0 && result.r_squared <= 1.0);
1830 assert!(result.residuals.len() == y.len());
1831 }
1832
1833 /// Test error handling when insufficient data is provided
1834 #[test]
1835 fn test_housing_regression_insufficient_data() {
1836 let y = vec![245.5, 312.8]; // Only 2 observations
1837 let x1 = vec![1200.0, 1800.0];
1838 let x2 = vec![3.0, 4.0];
1839
1840 let result = core::ols_regression(&y, &[x1, x2], &["Intercept".into(), "X1".into(), "X2".into()]);
1841 assert!(result.is_err());
1842 }
1843
1844 /// Test housing regression precision with tolerance check
1845 #[test]
1846 fn test_housing_regression_tolerance_check() {
1847 let y = vec![
1848 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9,
1849 367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7,
1850 267.9,
1851 ];
1852
1853 let square_feet = vec![
1854 1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1855 2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1856 1250.0, 1700.0, 850.0, 2350.0, 1400.0,
1857 ];
1858 let bedrooms = vec![
1859 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0,
1860 3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
1861 ];
1862 let age = vec![
1863 15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0,
1864 22.0, 1.0, 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
1865 ];
1866
1867 let x_vars = vec![square_feet, bedrooms, age];
1868 let names = vec![
1869 "Intercept".to_string(),
1870 "Square_Feet".to_string(),
1871 "Bedrooms".to_string(),
1872 "Age".to_string(),
1873 ];
1874
1875 let result = core::ols_regression(&y, &x_vars, &names).unwrap();
1876
1877 // Verify all coefficient values are finite
1878 for coef in &result.coefficients {
1879 assert!(coef.is_finite(), "Coefficient should be finite");
1880 }
1881 // Verify all standard errors are positive and finite
1882 for se in &result.std_errors {
1883 assert!(se.is_finite(), "Standard error should be finite");
1884 if *se <= 0.0 {
1885 panic!("Standard error should be positive, got {}", se);
1886 }
1887 }
1888 }
1889}
1890
1891#[cfg(feature = "wasm")]
1892#[wasm_bindgen]
1893/// Simple test function to verify WASM is working.
1894///
1895/// Returns a success message confirming the WASM module loaded correctly.
1896///
1897/// # Errors
1898///
1899/// Returns a JSON error object if domain check fails.
1900pub fn test() -> String {
1901 if let Err(e) = check_domain() {
1902 return error_to_json(&e);
1903 }
1904 "Rust WASM is working!".to_string()
1905}
1906
1907#[cfg(feature = "wasm")]
1908#[wasm_bindgen]
1909/// Returns the current version of the library.
1910///
1911/// Returns the Cargo package version as a string (e.g., "0.1.0").
1912///
1913/// # Errors
1914///
1915/// Returns a JSON error object if domain check fails.
1916pub fn get_version() -> String {
1917 if let Err(e) = check_domain() {
1918 return error_to_json(&e);
1919 }
1920 env!("CARGO_PKG_VERSION").to_string()
1921}
1922
1923#[cfg(feature = "wasm")]
1924#[wasm_bindgen]
1925/// Test function for t-critical value computation.
1926///
1927/// Returns JSON with the computed t-critical value for the given parameters.
1928///
1929/// # Errors
1930///
1931/// Returns a JSON error object if domain check fails.
1932pub fn test_t_critical(df: f64, alpha: f64) -> String {
1933 if let Err(e) = check_domain() {
1934 return error_to_json(&e);
1935 }
1936 let t_crit = core::t_critical_quantile(df, alpha);
1937 format!(
1938 r#"{{"df": {}, "alpha": {}, "t_critical": {}}}"#,
1939 df, alpha, t_crit
1940 )
1941}
1942
1943#[cfg(feature = "wasm")]
1944#[wasm_bindgen]
1945/// Test function for confidence interval computation.
1946///
1947/// Returns JSON with the computed confidence interval for a coefficient.
1948///
1949/// # Errors
1950///
1951/// Returns a JSON error object if domain check fails.
1952pub fn test_ci(coef: f64, se: f64, df: f64, alpha: f64) -> String {
1953 if let Err(e) = check_domain() {
1954 return error_to_json(&e);
1955 }
1956 let t_crit = core::t_critical_quantile(df, alpha);
1957 format!(
1958 r#"{{"lower": {}, "upper": {}}}"#,
1959 coef - t_crit * se,
1960 coef + t_crit * se
1961 )
1962}
1963
1964#[cfg(feature = "wasm")]
1965#[wasm_bindgen]
1966/// Test function for R accuracy validation.
1967///
1968/// Returns JSON comparing our statistical functions against R reference values.
1969///
1970/// # Errors
1971///
1972/// Returns a JSON error object if domain check fails.
1973pub fn test_r_accuracy() -> String {
1974 if let Err(e) = check_domain() {
1975 return error_to_json(&e);
1976 }
1977 format!(
1978 r#"{{"two_tail_p": {}, "qt_975": {}}}"#,
1979 core::two_tailed_p_value(1.6717, 21.0),
1980 core::t_critical_quantile(21.0, 0.05)
1981 )
1982}
1983
1984#[cfg(feature = "wasm")]
1985#[wasm_bindgen]
1986/// Test function for regression validation against R reference values.
1987///
1988/// Runs a regression on a housing dataset and compares results against R's lm() output.
1989/// Returns JSON with status "PASS" or "FAIL" with details.
1990///
1991/// # Errors
1992///
1993/// Returns a JSON error object if domain check fails.
1994pub fn test_housing_regression() -> String {
1995 if let Err(e) = check_domain() {
1996 return error_to_json(&e);
1997 }
1998
1999 match test_housing_regression_native() {
2000 Ok(result) => result,
2001 Err(e) => serde_json::json!({ "status": "ERROR", "error": e.to_string() }).to_string(),
2002 }
2003}
2004
2005// Native Rust test function (works without WASM feature)
2006#[cfg(any(test, feature = "wasm"))]
2007fn test_housing_regression_native() -> Result<String> {
2008 let y = vec![
2009 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9, 367.4,
2010 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7, 267.9,
2011 ];
2012
2013 let square_feet = vec![
2014 1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0, 2200.0,
2015 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0, 1250.0, 1700.0,
2016 850.0, 2350.0, 1400.0,
2017 ];
2018 let bedrooms = vec![
2019 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0, 3.0, 4.0,
2020 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
2021 ];
2022 let age = vec![
2023 15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0, 22.0, 1.0,
2024 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
2025 ];
2026
2027 let x_vars = vec![square_feet, bedrooms, age];
2028 let names = vec![
2029 "Intercept".to_string(),
2030 "Square_Feet".to_string(),
2031 "Bedrooms".to_string(),
2032 "Age".to_string(),
2033 ];
2034
2035 let result = core::ols_regression(&y, &x_vars, &names)?;
2036
2037 // Check against R results
2038 let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
2039 let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
2040
2041 let tolerance = 1e-4;
2042 let mut mismatches = vec![];
2043
2044 for i in 0..4 {
2045 if (result.coefficients[i] - expected_coeffs[i]).abs() > tolerance {
2046 mismatches.push(format!(
2047 "coeff[{}] differs: got {}, expected {}",
2048 i, result.coefficients[i], expected_coeffs[i]
2049 ));
2050 }
2051 if (result.std_errors[i] - expected_std_errs[i]).abs() > tolerance {
2052 mismatches.push(format!(
2053 "std_err[{}] differs: got {}, expected {}",
2054 i, result.std_errors[i], expected_std_errs[i]
2055 ));
2056 }
2057 }
2058
2059 if mismatches.is_empty() {
2060 Ok(serde_json::json!({ "status": "PASS" }).to_string())
2061 } else {
2062 Ok(serde_json::json!({ "status": "FAIL", "mismatches": mismatches }).to_string())
2063 }
2064}