Skip to main content

linreg_core/
lib.rs

1//! # linreg-core
2//!
3//! A lightweight, self-contained linear regression library in pure Rust.
4//!
5//! **No external math dependencies.** All linear algebra (matrices, QR decomposition)
6//! and statistical functions (distributions, hypothesis tests) are implemented from
7//! scratch. Compiles to WebAssembly for browser use or runs as a native Rust crate.
8//!
9//! ## What This Does
10//!
11//! - **OLS Regression** — Ordinary Least Squares with numerically stable QR decomposition
12//! - **Regularized Regression** — Ridge, Lasso, and Elastic Net via coordinate descent
13//! - **Diagnostic Tests** — 8+ statistical tests for validating regression assumptions
14//! - **WASM Support** — Same API works in browsers via WebAssembly
15//!
16//! ## Quick Start
17//!
18//! ### Native Rust
19//!
20//! Add to `Cargo.toml` (no WASM overhead):
21//!
22//! ```toml
23//! [dependencies]
24//! linreg-core = { version = "0.3", default-features = false }
25//! ```
26//!
27//! ```rust
28//! use linreg_core::core::ols_regression;
29//!
30//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
31//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
32//! let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
33//! let names = vec!["Intercept".into(), "Temp".into(), "Pressure".into()];
34//!
35//! let result = ols_regression(&y, &[x1, x2], &names)?;
36//! println!("R²: {}", result.r_squared);
37//! println!("F-statistic: {}", result.f_statistic);
38//! # Ok::<(), linreg_core::Error>(())
39//! ```
40//!
41//! ### WebAssembly (JavaScript)
42//!
43//! ```toml
44//! [dependencies]
45//! linreg-core = "0.3"
46//! ```
47//!
48//! Build with `wasm-pack build --target web`, then use in JavaScript:
49//!
50//! ```text
51//! import init, { ols_regression } from './linreg_core.js';
52//! await init();
53//!
54//! const result = JSON.parse(ols_regression(
55//!     JSON.stringify([2.5, 3.7, 4.2, 5.1, 6.3]),
56//!     JSON.stringify([[1,2,3,4,5], [2,4,5,4,3]]),
57//!     JSON.stringify(["Intercept", "X1", "X2"])
58//! ));
59//! console.log("R²:", result.r_squared);
60//! ```
61//!
62//! ## Regularized Regression
63//!
64//! ```no_run
65//! use linreg_core::regularized::{ridge, lasso};
66//! use linreg_core::linalg::Matrix;
67//!
68//! let x = Matrix::new(100, 3, vec![0.0; 300]);
69//! let y = vec![0.0; 100];
70//!
71//! // Ridge regression (L2 penalty - shrinks coefficients, handles multicollinearity)
72//! let ridge_result = ridge::ridge_fit(&x, &y, &ridge::RidgeFitOptions {
73//!     lambda: 1.0,
74//!     intercept: true,
75//!     standardize: true,
76//!     ..Default::default()
77//! })?;
78//!
79//! // Lasso regression (L1 penalty - does variable selection by zeroing coefficients)
80//! let lasso_result = lasso::lasso_fit(&x, &y, &lasso::LassoFitOptions {
81//!     lambda: 0.1,
82//!     intercept: true,
83//!     standardize: true,
84//!     ..Default::default()
85//! })?;
86//! # Ok::<(), linreg_core::Error>(())
87//! ```
88//!
89//! ## Diagnostic Tests
90//!
91//! After fitting a model, validate its assumptions:
92//!
93//! | Test | Tests For | Use When |
94//! |------|-----------|----------|
95//! | [`rainbow_test`] | Linearity | Checking if relationships are linear |
96//! | [`harvey_collier_test`] | Functional form | Suspecting model misspecification |
97//! | [`breusch_pagan_test`] | Heteroscedasticity | Variance changes with predictors |
98//! | [`white_test`] | Heteroscedasticity | More general than Breusch-Pagan |
99//! | [`shapiro_wilk_test`] | Normality | Small to moderate samples (n ≤ 5000) |
100//! | [`jarque_bera_test`] | Normality | Large samples, skewness/kurtosis |
101//! | [`anderson_darling_test`] | Normality | Tail-sensitive, any sample size |
102//! | [`durbin_watson_test`] | Autocorrelation | Time series or ordered data |
103//! | [`cooks_distance_test`] | Influential points | Identifying high-impact observations |
104//!
105//! ```rust
106//! use linreg_core::diagnostics::{rainbow_test, breusch_pagan_test, RainbowMethod};
107//!
108//! # let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
109//! # let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
110//! # let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
111//! // Rainbow test for linearity
112//! let rainbow = rainbow_test(&y, &[x1.clone(), x2.clone()], 0.5, RainbowMethod::R)?;
113//! if rainbow.r_result.as_ref().map_or(false, |r| r.p_value < 0.05) {
114//!     println!("Warning: relationship may be non-linear");
115//! }
116//!
117//! // Breusch-Pagan test for heteroscedasticity
118//! let bp = breusch_pagan_test(&y, &[x1, x2])?;
119//! if bp.p_value < 0.05 {
120//!     println!("Warning: residuals have non-constant variance");
121//! }
122//! # Ok::<(), linreg_core::Error>(())
123//! ```
124//!
125//! ## Feature Flags
126//!
127//! | Flag | Default | Description |
128//! |------|---------|-------------|
129//! | `wasm` | Yes | Enables WASM bindings and browser support |
130//! | `validation` | No | Includes test data for validation tests |
131//!
132//! For native-only builds (smaller binary, no WASM deps):
133//!
134//! ```toml
135//! linreg-core = { version = "0.3", default-features = false }
136//! ```
137//!
138//! ## Why This Library?
139//!
140//! - **Zero dependencies** — No `nalgebra`, no `statrs`, no `ndarray`. Pure Rust.
141//! - **Validated** — Outputs match R's `lm()` and Python's `statsmodels`
142//! - **WASM-ready** — Same code runs natively and in browsers
143//! - **Small** — Core is ~2000 lines, compiles quickly
144//! - **Permissive license** — MIT OR Apache-2.0
145//!
146//! ## Module Structure
147//!
148//! - [`core`] — OLS regression, coefficients, residuals, VIF
149//! - [`regularized`] — Ridge, Lasso, Elastic Net, regularization paths
150//! - [`diagnostics`] — All diagnostic tests (linearity, heteroscedasticity, normality, autocorrelation)
151//! - [`distributions`] — Statistical distributions (t, F, χ², normal, beta, gamma)
152//! - [`linalg`] — Matrix operations, QR decomposition, linear system solver
153//! - [`error`] — Error types and Result alias
154//!
155//! ## Links
156//!
157//! - [Repository](https://github.com/jesse-anderson/linreg-core)
158//! - [Documentation](https://docs.rs/linreg-core)
159//! - [Examples](https://github.com/jesse-anderson/linreg-core/tree/main/examples)
160//!
161//! ## Disclaimer
162//!
163//! This library is under active development and has not reached 1.0 stability.
164//! While outputs are validated against R and Python implementations, **do not
165//! use this library for critical applications** (medical, financial, safety-critical
166//! systems) without independent verification. See the LICENSE for full terms.
167//! The software is provided "as is" without warranty of any kind.
168
169// Import core modules (always available)
170pub mod core;
171pub mod diagnostics;
172pub mod distributions;
173pub mod error;
174pub mod linalg;
175pub mod regularized;
176pub mod stats;
177
178// Unit tests are now in tests/unit/ directory
179// - error_tests.rs -> tests/unit/error_tests.rs
180// - core_tests.rs -> tests/unit/core_tests.rs
181// - linalg_tests.rs -> tests/unit/linalg_tests.rs
182// - validation_tests.rs -> tests/validation/main.rs
183// - diagnostics_tests.rs: disabled (references unimplemented functions)
184
185// Re-export public API (always available)
186pub use core::{RegressionOutput, VifResult};
187pub use diagnostics::{
188    BGTestType, BreuschGodfreyResult, CooksDistanceResult, DiagnosticTestResult,
189    RainbowMethod, RainbowSingleResult, RainbowTestOutput, ResetType,
190    WhiteMethod, WhiteSingleResult, WhiteTestOutput,
191};
192
193// Re-export core test functions with different names to avoid WASM conflicts
194pub use diagnostics::rainbow_test as rainbow_test_core;
195pub use diagnostics::white_test as white_test_core;
196
197pub use error::{error_json, error_to_json, Error, Result};
198
199// ============================================================================
200// WASM-specific code (only compiled when "wasm" feature is enabled)
201// ============================================================================
202
203#[cfg(feature = "wasm")]
204use wasm_bindgen::prelude::*;
205
206#[cfg(feature = "wasm")]
207use std::collections::HashSet;
208
209#[cfg(feature = "wasm")]
210use serde::Serialize;
211
212#[cfg(feature = "wasm")]
213use crate::distributions::{normal_inverse_cdf, student_t_cdf};
214
215// ============================================================================
216// CSV Parsing (WASM-only)
217// ============================================================================
218
219#[cfg(feature = "wasm")]
220#[derive(Serialize)]
221struct ParsedCsv {
222    headers: Vec<String>,
223    data: Vec<serde_json::Map<String, serde_json::Value>>,
224    numeric_columns: Vec<String>,
225}
226
227#[cfg(feature = "wasm")]
228#[wasm_bindgen]
229/// Parses CSV data and returns it as a JSON string.
230///
231/// Parses the CSV content and identifies numeric columns. Returns a JSON object
232/// with headers, data rows, and a list of numeric column names.
233///
234/// # Arguments
235///
236/// * `content` - CSV content as a string
237///
238/// # Returns
239///
240/// JSON string with structure:
241/// ```json
242/// {
243///   "headers": ["col1", "col2", ...],
244///   "data": [{"col1": 1.0, "col2": "text"}, ...],
245///   "numeric_columns": ["col1", ...]
246/// }
247/// ```
248///
249/// # Errors
250///
251/// Returns a JSON error object if parsing fails or domain check fails.
252pub fn parse_csv(content: &str) -> String {
253    if let Err(e) = check_domain() {
254        return error_to_json(&e);
255    }
256
257    let mut reader = csv::ReaderBuilder::new()
258        .has_headers(true)
259        .flexible(true)
260        .from_reader(content.as_bytes());
261
262    // Get headers
263    let headers: Vec<String> = match reader.headers() {
264        Ok(h) => h.iter().map(|s| s.to_string()).collect(),
265        Err(e) => return error_json(&format!("Failed to read headers: {}", e)),
266    };
267
268    let mut data = Vec::new();
269    let mut numeric_col_set = HashSet::new();
270
271    for result in reader.records() {
272        let record = match result {
273            Ok(r) => r,
274            Err(e) => return error_json(&format!("Failed to parse CSV record: {}", e)),
275        };
276
277        if record.len() != headers.len() {
278            continue;
279        }
280
281        let mut row_map = serde_json::Map::new();
282
283        for (i, field) in record.iter().enumerate() {
284            if i >= headers.len() {
285                continue;
286            }
287
288            let header = &headers[i];
289            let val_trimmed = field.trim();
290
291            // Try to parse as f64
292            if let Ok(num) = val_trimmed.parse::<f64>() {
293                if num.is_finite() {
294                    row_map.insert(
295                        header.clone(),
296                        serde_json::Value::Number(serde_json::Number::from_f64(num).unwrap()),
297                    );
298                    numeric_col_set.insert(header.clone());
299                    continue;
300                }
301            }
302
303            // Fallback to string
304            row_map.insert(
305                header.clone(),
306                serde_json::Value::String(val_trimmed.to_string()),
307            );
308        }
309        data.push(row_map);
310    }
311
312    let mut numeric_columns: Vec<String> = numeric_col_set.into_iter().collect();
313    numeric_columns.sort();
314
315    let output = ParsedCsv {
316        headers,
317        data,
318        numeric_columns,
319    };
320
321    serde_json::to_string(&output).unwrap_or_else(|_| error_json("Failed to serialize CSV output"))
322}
323
324// ============================================================================
325// OLS Regression WASM Wrapper
326// ============================================================================
327
328#[cfg(feature = "wasm")]
329#[wasm_bindgen]
330/// Performs OLS regression via WASM.
331///
332/// All parameters and return values are JSON-encoded strings for JavaScript
333/// interoperability. Returns regression output including coefficients,
334/// standard errors, diagnostic statistics, and VIF analysis.
335///
336/// # Arguments
337///
338/// * `y_json` - JSON array of response variable values: `[1.0, 2.0, 3.0]`
339/// * `x_vars_json` - JSON array of predictor arrays: `[[1.0, 2.0], [0.5, 1.0]]`
340/// * `variable_names` - JSON array of variable names: `["Intercept", "X1", "X2"]`
341///
342/// # Returns
343///
344/// JSON string containing the complete regression output with coefficients,
345/// standard errors, t-statistics, p-values, R², F-statistic, residuals, leverage, VIF, etc.
346///
347/// # Errors
348///
349/// Returns a JSON error object if:
350/// - JSON parsing fails
351/// - Insufficient data (n ≤ k + 1)
352/// - Matrix is singular
353/// - Domain check fails
354pub fn ols_regression(y_json: &str, x_vars_json: &str, variable_names: &str) -> String {
355    if let Err(e) = check_domain() {
356        return error_to_json(&e);
357    }
358
359    // Parse JSON input
360    let y: Vec<f64> = match serde_json::from_str(y_json) {
361        Ok(v) => v,
362        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
363    };
364
365    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
366        Ok(v) => v,
367        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
368    };
369
370    let names: Vec<String> = match serde_json::from_str(variable_names) {
371        Ok(v) => v,
372        Err(_) => vec!["Intercept".to_string()],
373    };
374
375    // Call core function
376    match core::ols_regression(&y, &x_vars, &names) {
377        Ok(output) => serde_json::to_string(&output)
378            .unwrap_or_else(|_| error_json("Failed to serialize output")),
379        Err(e) => error_json(&e.to_string()),
380    }
381}
382
383// ============================================================================
384// Diagnostic Tests WASM Wrappers
385// ============================================================================
386
387/// Performs the Rainbow test for linearity via WASM.
388///
389/// The Rainbow test checks whether the relationship between predictors and response
390/// is linear. A significant p-value suggests non-linearity.
391///
392/// # Arguments
393///
394/// * `y_json` - JSON array of response variable values
395/// * `x_vars_json` - JSON array of predictor arrays
396/// * `fraction` - Fraction of data to use in the central subset (0.0 to 1.0, typically 0.5)
397/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
398///
399/// # Returns
400///
401/// JSON string containing test statistic, p-value, and interpretation.
402///
403/// # Errors
404///
405/// Returns a JSON error object if parsing fails or domain check fails.
406#[cfg(feature = "wasm")]
407#[wasm_bindgen]
408pub fn rainbow_test(y_json: &str, x_vars_json: &str, fraction: f64, method: &str) -> String {
409    if let Err(e) = check_domain() {
410        return error_to_json(&e);
411    }
412
413    let y: Vec<f64> = match serde_json::from_str(y_json) {
414        Ok(v) => v,
415        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
416    };
417
418    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
419        Ok(v) => v,
420        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
421    };
422
423    // Parse method parameter (default to "r" for R)
424    let method = match method.to_lowercase().as_str() {
425        "python" => diagnostics::RainbowMethod::Python,
426        "both" => diagnostics::RainbowMethod::Both,
427        _ => diagnostics::RainbowMethod::R, // Default to R
428    };
429
430    match diagnostics::rainbow_test(&y, &x_vars, fraction, method) {
431        Ok(output) => serde_json::to_string(&output)
432            .unwrap_or_else(|_| error_json("Failed to serialize Rainbow test result")),
433        Err(e) => error_json(&e.to_string()),
434    }
435}
436
437/// Performs the Harvey-Collier test for linearity via WASM.
438///
439/// The Harvey-Collier test checks whether the residuals exhibit a linear trend,
440/// which would indicate that the model's functional form is misspecified.
441/// A significant p-value suggests non-linearity.
442///
443/// # Arguments
444///
445/// * `y_json` - JSON array of response variable values
446/// * `x_vars_json` - JSON array of predictor arrays
447///
448/// # Returns
449///
450/// JSON string containing test statistic, p-value, and interpretation.
451///
452/// # Errors
453///
454/// Returns a JSON error object if parsing fails or domain check fails.
455#[cfg(feature = "wasm")]
456#[wasm_bindgen]
457pub fn harvey_collier_test(y_json: &str, x_vars_json: &str) -> String {
458    if let Err(e) = check_domain() {
459        return error_to_json(&e);
460    }
461
462    let y: Vec<f64> = match serde_json::from_str(y_json) {
463        Ok(v) => v,
464        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
465    };
466
467    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
468        Ok(v) => v,
469        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
470    };
471
472    match diagnostics::harvey_collier_test(&y, &x_vars) {
473        Ok(output) => serde_json::to_string(&output)
474            .unwrap_or_else(|_| error_json("Failed to serialize Harvey-Collier test result")),
475        Err(e) => error_json(&e.to_string()),
476    }
477}
478
479/// Performs the Breusch-Pagan test for heteroscedasticity via WASM.
480///
481/// The Breusch-Pagan test checks whether the variance of residuals is constant
482/// across the range of predicted values (homoscedasticity assumption).
483/// A significant p-value suggests heteroscedasticity.
484///
485/// # Arguments
486///
487/// * `y_json` - JSON array of response variable values
488/// * `x_vars_json` - JSON array of predictor arrays
489///
490/// # Returns
491///
492/// JSON string containing test statistic, p-value, and interpretation.
493///
494/// # Errors
495///
496/// Returns a JSON error object if parsing fails or domain check fails.
497#[cfg(feature = "wasm")]
498#[wasm_bindgen]
499pub fn breusch_pagan_test(y_json: &str, x_vars_json: &str) -> String {
500    if let Err(e) = check_domain() {
501        return error_to_json(&e);
502    }
503
504    let y: Vec<f64> = match serde_json::from_str(y_json) {
505        Ok(v) => v,
506        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
507    };
508
509    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
510        Ok(v) => v,
511        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
512    };
513
514    match diagnostics::breusch_pagan_test(&y, &x_vars) {
515        Ok(output) => serde_json::to_string(&output)
516            .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Pagan test result")),
517        Err(e) => error_json(&e.to_string()),
518    }
519}
520
521/// Performs the White test for heteroscedasticity via WASM.
522///
523/// The White test is a more general test for heteroscedasticity that does not
524/// assume a specific form of heteroscedasticity. A significant p-value suggests
525/// that the error variance is not constant.
526///
527/// # Arguments
528///
529/// * `y_json` - JSON array of response variable values
530/// * `x_vars_json` - JSON array of predictor arrays
531/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
532///
533/// # Returns
534///
535/// JSON string containing test statistic, p-value, and interpretation.
536///
537/// # Errors
538///
539/// Returns a JSON error object if parsing fails or domain check fails.
540#[cfg(feature = "wasm")]
541#[wasm_bindgen]
542pub fn white_test(y_json: &str, x_vars_json: &str, method: &str) -> String {
543    if let Err(e) = check_domain() {
544        return error_to_json(&e);
545    }
546
547    let y: Vec<f64> = match serde_json::from_str(y_json) {
548        Ok(v) => v,
549        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
550    };
551
552    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
553        Ok(v) => v,
554        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
555    };
556
557    // Parse method parameter (default to "r" for R)
558    let method = match method.to_lowercase().as_str() {
559        "python" => diagnostics::WhiteMethod::Python,
560        "both" => diagnostics::WhiteMethod::Both,
561        _ => diagnostics::WhiteMethod::R, // Default to R
562    };
563
564    match diagnostics::white_test(&y, &x_vars, method) {
565        Ok(output) => serde_json::to_string(&output)
566            .unwrap_or_else(|_| error_json("Failed to serialize White test result")),
567        Err(e) => error_json(&e.to_string()),
568    }
569}
570
571/// Performs the R method White test for heteroscedasticity via WASM.
572///
573/// This implementation matches R's `skedastic::white()` function behavior.
574/// Uses the standard QR decomposition and the R-specific auxiliary matrix
575/// structure (intercept, X, X² only - no cross-products).
576///
577/// # Arguments
578///
579/// * `y_json` - JSON array of response variable values
580/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
581///
582/// # Returns
583///
584/// JSON string containing test statistic, p-value, and interpretation.
585///
586/// # Errors
587///
588/// Returns a JSON error object if parsing fails or domain check fails.
589#[cfg(feature = "wasm")]
590#[wasm_bindgen]
591pub fn r_white_test(y_json: &str, x_vars_json: &str) -> String {
592    if let Err(e) = check_domain() {
593        return error_to_json(&e);
594    }
595
596    let y: Vec<f64> = match serde_json::from_str(y_json) {
597        Ok(v) => v,
598        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
599    };
600
601    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
602        Ok(v) => v,
603        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
604    };
605
606    match diagnostics::r_white_method(&y, &x_vars) {
607        Ok(output) => serde_json::to_string(&output)
608            .unwrap_or_else(|_| error_json("Failed to serialize R White test result")),
609        Err(e) => error_json(&e.to_string()),
610    }
611}
612
613/// Performs the Python method White test for heteroscedasticity via WASM.
614///
615/// This implementation matches Python's `statsmodels.stats.diagnostic.het_white()` function.
616/// Uses the LINPACK QR decomposition with column pivoting and the Python-specific
617/// auxiliary matrix structure (intercept, X, X², and cross-products).
618///
619/// # Arguments
620///
621/// * `y_json` - JSON array of response variable values
622/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
623///
624/// # Returns
625///
626/// JSON string containing test statistic, p-value, and interpretation.
627///
628/// # Errors
629///
630/// Returns a JSON error object if parsing fails or domain check fails.
631#[cfg(feature = "wasm")]
632#[wasm_bindgen]
633pub fn python_white_test(y_json: &str, x_vars_json: &str) -> String {
634    if let Err(e) = check_domain() {
635        return error_to_json(&e);
636    }
637
638    let y: Vec<f64> = match serde_json::from_str(y_json) {
639        Ok(v) => v,
640        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
641    };
642
643    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
644        Ok(v) => v,
645        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
646    };
647
648    match diagnostics::python_white_method(&y, &x_vars) {
649        Ok(output) => serde_json::to_string(&output)
650            .unwrap_or_else(|_| error_json("Failed to serialize Python White test result")),
651        Err(e) => error_json(&e.to_string()),
652    }
653}
654
655/// Performs the Jarque-Bera test for normality via WASM.
656///
657/// The Jarque-Bera test checks whether the residuals are normally distributed
658/// by examining skewness and kurtosis. A significant p-value suggests that
659/// the residuals deviate from normality.
660///
661/// # Arguments
662///
663/// * `y_json` - JSON array of response variable values
664/// * `x_vars_json` - JSON array of predictor arrays
665///
666/// # Returns
667///
668/// JSON string containing test statistic, p-value, and interpretation.
669///
670/// # Errors
671///
672/// Returns a JSON error object if parsing fails or domain check fails.
673#[cfg(feature = "wasm")]
674#[wasm_bindgen]
675pub fn jarque_bera_test(y_json: &str, x_vars_json: &str) -> String {
676    if let Err(e) = check_domain() {
677        return error_to_json(&e);
678    }
679
680    let y: Vec<f64> = match serde_json::from_str(y_json) {
681        Ok(v) => v,
682        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
683    };
684
685    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
686        Ok(v) => v,
687        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
688    };
689
690    match diagnostics::jarque_bera_test(&y, &x_vars) {
691        Ok(output) => serde_json::to_string(&output)
692            .unwrap_or_else(|_| error_json("Failed to serialize Jarque-Bera test result")),
693        Err(e) => error_json(&e.to_string()),
694    }
695}
696
697// ============================================================================
698// Durbin-Watson Test (WASM wrapper)
699// ============================================================================
700
701/// Performs the Durbin-Watson test for autocorrelation via WASM.
702///
703/// The Durbin-Watson test checks for autocorrelation in the residuals.
704/// Values near 2 indicate no autocorrelation, values near 0 suggest positive
705/// autocorrelation, and values near 4 suggest negative autocorrelation.
706///
707/// # Arguments
708///
709/// * `y_json` - JSON array of response variable values
710/// * `x_vars_json` - JSON array of predictor arrays
711///
712/// # Returns
713///
714/// JSON string containing the DW statistic and interpretation.
715///
716/// # Errors
717///
718/// Returns a JSON error object if parsing fails or domain check fails.
719#[cfg(feature = "wasm")]
720#[wasm_bindgen]
721pub fn durbin_watson_test(y_json: &str, x_vars_json: &str) -> String {
722    if let Err(e) = check_domain() {
723        return error_to_json(&e);
724    }
725
726    let y: Vec<f64> = match serde_json::from_str(y_json) {
727        Ok(v) => v,
728        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
729    };
730
731    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
732        Ok(v) => v,
733        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
734    };
735
736    match diagnostics::durbin_watson_test(&y, &x_vars) {
737        Ok(output) => serde_json::to_string(&output)
738            .unwrap_or_else(|_| error_json("Failed to serialize Durbin-Watson test result")),
739        Err(e) => error_json(&e.to_string()),
740    }
741}
742
743// ============================================================================
744// Shapiro-Wilk Test (WASM wrapper)
745// ============================================================================
746
747/// Performs the Shapiro-Wilk test for normality via WASM.
748///
749/// The Shapiro-Wilk test is a powerful test for normality,
750/// especially for small to moderate sample sizes (3 ≤ n ≤ 5000). It tests
751/// the null hypothesis that the residuals are normally distributed.
752///
753/// # Arguments
754///
755/// * `y_json` - JSON array of response variable values
756/// * `x_vars_json` - JSON array of predictor arrays
757///
758/// # Returns
759///
760/// JSON string containing the W statistic (ranges from 0 to 1), p-value,
761/// and interpretation.
762///
763/// # Errors
764///
765/// Returns a JSON error object if parsing fails or domain check fails.
766#[cfg(feature = "wasm")]
767#[wasm_bindgen]
768pub fn shapiro_wilk_test(y_json: &str, x_vars_json: &str) -> String {
769    if let Err(e) = check_domain() {
770        return error_to_json(&e);
771    }
772
773    let y: Vec<f64> = match serde_json::from_str(y_json) {
774        Ok(v) => v,
775        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
776    };
777
778    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
779        Ok(v) => v,
780        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
781    };
782
783    match diagnostics::shapiro_wilk_test(&y, &x_vars) {
784        Ok(output) => serde_json::to_string(&output)
785            .unwrap_or_else(|_| error_json("Failed to serialize Shapiro-Wilk test result")),
786        Err(e) => error_json(&e.to_string()),
787    }
788}
789
790/// Performs the Anderson-Darling test for normality via WASM.
791///
792/// The Anderson-Darling test checks whether the residuals are normally distributed
793/// by comparing the empirical distribution to the expected normal distribution.
794/// This test is particularly sensitive to deviations in the tails of the distribution.
795/// A significant p-value suggests that the residuals deviate from normality.
796///
797/// # Arguments
798///
799/// * `y_json` - JSON array of response variable values
800/// * `x_vars_json` - JSON array of predictor arrays
801///
802/// # Returns
803///
804/// JSON string containing the A² statistic, p-value, and interpretation.
805///
806/// # Errors
807///
808/// Returns a JSON error object if parsing fails or domain check fails.
809#[cfg(feature = "wasm")]
810#[wasm_bindgen]
811pub fn anderson_darling_test(y_json: &str, x_vars_json: &str) -> String {
812    if let Err(e) = check_domain() {
813        return error_to_json(&e);
814    }
815
816    let y: Vec<f64> = match serde_json::from_str(y_json) {
817        Ok(v) => v,
818        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
819    };
820
821    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
822        Ok(v) => v,
823        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
824    };
825
826    match diagnostics::anderson_darling_test(&y, &x_vars) {
827        Ok(output) => serde_json::to_string(&output)
828            .unwrap_or_else(|_| error_json("Failed to serialize Anderson-Darling test result")),
829        Err(e) => error_json(&e.to_string()),
830    }
831}
832
833// ============================================================================
834// Cook's Distance (WASM wrapper)
835// ============================================================================
836
837/// Computes Cook's distance for identifying influential observations via WASM.
838///
839/// Cook's distance measures how much each observation influences the regression
840/// model by comparing coefficient estimates with and without that observation.
841/// Unlike hypothesis tests, this is an influence measure - not a test with p-values.
842///
843/// # Arguments
844///
845/// * `y_json` - JSON array of response variable values
846/// * `x_vars_json` - JSON array of predictor arrays
847///
848/// # Returns
849///
850/// JSON string containing:
851/// - Vector of Cook's distances (one per observation)
852/// - Thresholds for identifying influential observations
853/// - Indices of potentially influential observations
854/// - Interpretation and guidance
855///
856/// # Errors
857///
858/// Returns a JSON error object if parsing fails or domain check fails.
859#[cfg(feature = "wasm")]
860#[wasm_bindgen]
861pub fn cooks_distance_test(y_json: &str, x_vars_json: &str) -> String {
862    if let Err(e) = check_domain() {
863        return error_to_json(&e);
864    }
865
866    let y: Vec<f64> = match serde_json::from_str(y_json) {
867        Ok(v) => v,
868        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
869    };
870
871    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
872        Ok(v) => v,
873        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
874    };
875
876    match diagnostics::cooks_distance_test(&y, &x_vars) {
877        Ok(output) => serde_json::to_string(&output)
878            .unwrap_or_else(|_| error_json("Failed to serialize Cook's distance result")),
879        Err(e) => error_json(&e.to_string()),
880    }
881}
882
883/// Performs the RESET test for model specification error via WASM.
884///
885/// The RESET (Regression Specification Error Test) test checks whether the model
886/// is correctly specified by testing if additional terms (powers of fitted values,
887/// regressors, or first principal component) significantly improve the model fit.
888///
889/// # Arguments
890///
891/// * `y_json` - JSON array of response variable values
892/// * `x_vars_json` - JSON array of predictor arrays
893/// * `powers_json` - JSON array of powers to use (e.g., [2, 3] for ŷ², ŷ³)
894/// * `type_` - Type of terms to add: "fitted", "regressor", or "princomp"
895///
896/// # Returns
897///
898/// JSON string containing the F-statistic, p-value, and interpretation.
899///
900/// # Errors
901///
902/// Returns a JSON error object if parsing fails or domain check fails.
903#[cfg(feature = "wasm")]
904#[wasm_bindgen]
905pub fn reset_test(y_json: &str, x_vars_json: &str, powers_json: &str, type_: &str) -> String {
906    if let Err(e) = check_domain() {
907        return error_to_json(&e);
908    }
909
910    let y: Vec<f64> = match serde_json::from_str(y_json) {
911        Ok(v) => v,
912        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
913    };
914
915    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
916        Ok(v) => v,
917        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
918    };
919
920    let powers: Vec<usize> = match serde_json::from_str(powers_json) {
921        Ok(v) => v,
922        Err(e) => return error_json(&format!("Failed to parse powers: {}", e)),
923    };
924
925    // Parse reset type (default to "fitted")
926    let reset_type = match type_.to_lowercase().as_str() {
927        "regressor" => diagnostics::ResetType::Regressor,
928        "princomp" => diagnostics::ResetType::PrincipalComponent,
929        _ => diagnostics::ResetType::Fitted,
930    };
931
932    match diagnostics::reset_test(&y, &x_vars, &powers, reset_type) {
933        Ok(output) => serde_json::to_string(&output)
934            .unwrap_or_else(|_| error_json("Failed to serialize RESET test result")),
935        Err(e) => error_json(&e.to_string()),
936    }
937}
938
939/// Performs the Breusch-Godfrey test for higher-order serial correlation via WASM.
940///
941/// Unlike the Durbin-Watson test which only detects first-order autocorrelation,
942/// the Breusch-Godfrey test can detect serial correlation at any lag order.
943///
944/// # Arguments
945///
946/// * `y_json` - JSON array of response variable values
947/// * `x_vars_json` - JSON array of predictor arrays
948/// * `order` - Maximum order of serial correlation to test (default: 1)
949/// * `test_type` - Type of test statistic: "chisq" or "f" (default: "chisq")
950///
951/// # Returns
952///
953/// JSON string containing test statistic, p-value, degrees of freedom, and interpretation.
954///
955/// # Errors
956///
957/// Returns a JSON error object if parsing fails or domain check fails.
958#[cfg(feature = "wasm")]
959#[wasm_bindgen]
960pub fn breusch_godfrey_test(y_json: &str, x_vars_json: &str, order: usize, test_type: &str) -> String {
961    if let Err(e) = check_domain() {
962        return error_to_json(&e);
963    }
964
965    let y: Vec<f64> = match serde_json::from_str(y_json) {
966        Ok(v) => v,
967        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
968    };
969
970    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
971        Ok(v) => v,
972        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
973    };
974
975    // Parse test type (default to "chisq")
976    let bg_test_type = match test_type.to_lowercase().as_str() {
977        "f" => diagnostics::BGTestType::F,
978        _ => diagnostics::BGTestType::Chisq,
979    };
980
981    match diagnostics::breusch_godfrey_test(&y, &x_vars, order, bg_test_type) {
982        Ok(output) => serde_json::to_string(&output)
983            .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Godfrey test result")),
984        Err(e) => error_json(&e.to_string()),
985    }
986}
987
988// ============================================================================
989// Regularized Regression WASM Wrappers
990// ============================================================================
991
992#[cfg(feature = "wasm")]
993#[wasm_bindgen]
994/// Performs Ridge regression via WASM.
995///
996/// Ridge regression adds an L2 penalty to the coefficients, which helps with
997/// multicollinearity and overfitting. The intercept is never penalized.
998///
999/// # Arguments
1000///
1001/// * `y_json` - JSON array of response variable values
1002/// * `x_vars_json` - JSON array of predictor arrays
1003/// * `variable_names` - JSON array of variable names
1004/// * `lambda` - Regularization strength (>= 0, typical range 0.01 to 100)
1005/// * `standardize` - Whether to standardize predictors (recommended: true)
1006///
1007/// # Returns
1008///
1009/// JSON string containing:
1010/// - `lambda` - Lambda value used
1011/// - `intercept` - Intercept coefficient
1012/// - `coefficients` - Slope coefficients
1013/// - `fitted_values` - Predictions on training data
1014/// - `residuals` - Residuals (y - fitted_values)
1015/// - `df` - Effective degrees of freedom
1016///
1017/// # Errors
1018///
1019/// Returns a JSON error object if parsing fails, lambda is negative,
1020/// or domain check fails.
1021pub fn ridge_regression(
1022    y_json: &str,
1023    x_vars_json: &str,
1024    _variable_names: &str,
1025    lambda: f64,
1026    standardize: bool,
1027) -> String {
1028    if let Err(e) = check_domain() {
1029        return error_to_json(&e);
1030    }
1031
1032    // Parse JSON input
1033    let y: Vec<f64> = match serde_json::from_str(y_json) {
1034        Ok(v) => v,
1035        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1036    };
1037
1038    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1039        Ok(v) => v,
1040        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1041    };
1042
1043    // Build design matrix with intercept column
1044    let n = y.len();
1045    let p = x_vars.len();
1046
1047    if n <= p + 1 {
1048        return error_json(&format!(
1049            "Insufficient data: need at least {} observations for {} predictors",
1050            p + 2,
1051            p
1052        ));
1053    }
1054
1055    let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1056    for (j, x_var) in x_vars.iter().enumerate() {
1057        if x_var.len() != n {
1058            return error_json(&format!(
1059                "x_vars[{}] has {} elements, expected {}",
1060                j,
1061                x_var.len(),
1062                n
1063            ));
1064        }
1065        for (i, &val) in x_var.iter().enumerate() {
1066            x_data[i * (p + 1) + j + 1] = val;
1067        }
1068    }
1069
1070    let x = linalg::Matrix::new(n, p + 1, x_data);
1071
1072    // Configure ridge options
1073    let options = regularized::ridge::RidgeFitOptions {
1074        lambda,
1075        intercept: true,
1076        standardize,
1077        max_iter: 100000,
1078        tol: 1e-7,
1079        warm_start: None,
1080        weights: None,
1081    };
1082
1083    match regularized::ridge::ridge_fit(&x, &y, &options) {
1084        Ok(output) => serde_json::to_string(&output)
1085            .unwrap_or_else(|_| error_json("Failed to serialize ridge regression result")),
1086        Err(e) => error_json(&e.to_string()),
1087    }
1088}
1089
1090#[cfg(feature = "wasm")]
1091#[wasm_bindgen]
1092/// Performs Lasso regression via WASM.
1093///
1094/// Lasso regression adds an L1 penalty to the coefficients, which performs
1095/// automatic variable selection by shrinking some coefficients to exactly zero.
1096/// The intercept is never penalized.
1097///
1098/// # Arguments
1099///
1100/// * `y_json` - JSON array of response variable values
1101/// * `x_vars_json` - JSON array of predictor arrays
1102/// * `variable_names` - JSON array of variable names
1103/// * `lambda` - Regularization strength (>= 0, typical range 0.01 to 10)
1104/// * `standardize` - Whether to standardize predictors (recommended: true)
1105/// * `max_iter` - Maximum coordinate descent iterations (default: 100000)
1106/// * `tol` - Convergence tolerance (default: 1e-7)
1107///
1108/// # Returns
1109///
1110/// JSON string containing:
1111/// - `lambda` - Lambda value used
1112/// - `intercept` - Intercept coefficient
1113/// - `coefficients` - Slope coefficients (some may be exactly zero)
1114/// - `fitted_values` - Predictions on training data
1115/// - `residuals` - Residuals (y - fitted_values)
1116/// - `n_nonzero` - Number of non-zero coefficients (excluding intercept)
1117/// - `iterations` - Number of coordinate descent iterations
1118/// - `converged` - Whether the algorithm converged
1119///
1120/// # Errors
1121///
1122/// Returns a JSON error object if parsing fails, lambda is negative,
1123/// or domain check fails.
1124pub fn lasso_regression(
1125    y_json: &str,
1126    x_vars_json: &str,
1127    _variable_names: &str,
1128    lambda: f64,
1129    standardize: bool,
1130    max_iter: usize,
1131    tol: f64,
1132) -> String {
1133    if let Err(e) = check_domain() {
1134        return error_to_json(&e);
1135    }
1136
1137    // Parse JSON input
1138    let y: Vec<f64> = match serde_json::from_str(y_json) {
1139        Ok(v) => v,
1140        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1141    };
1142
1143    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1144        Ok(v) => v,
1145        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1146    };
1147
1148    // Build design matrix with intercept column
1149    let n = y.len();
1150    let p = x_vars.len();
1151
1152    if n <= p + 1 {
1153        return error_json(&format!(
1154            "Insufficient data: need at least {} observations for {} predictors",
1155            p + 2,
1156            p
1157        ));
1158    }
1159
1160    let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1161    for (j, x_var) in x_vars.iter().enumerate() {
1162        if x_var.len() != n {
1163            return error_json(&format!(
1164                "x_vars[{}] has {} elements, expected {}",
1165                j,
1166                x_var.len(),
1167                n
1168            ));
1169        }
1170        for (i, &val) in x_var.iter().enumerate() {
1171            x_data[i * (p + 1) + j + 1] = val;
1172        }
1173    }
1174
1175    let x = linalg::Matrix::new(n, p + 1, x_data);
1176
1177    // Configure lasso options
1178    let options = regularized::lasso::LassoFitOptions {
1179        lambda,
1180        intercept: true,
1181        standardize,
1182        max_iter,
1183        tol,
1184        ..Default::default()
1185    };
1186
1187    match regularized::lasso::lasso_fit(&x, &y, &options) {
1188        Ok(output) => serde_json::to_string(&output)
1189            .unwrap_or_else(|_| error_json("Failed to serialize lasso regression result")),
1190        Err(e) => error_json(&e.to_string()),
1191    }
1192}
1193
1194#[cfg(feature = "wasm")]
1195#[wasm_bindgen]
1196#[allow(clippy::too_many_arguments)]
1197/// Performs Elastic Net regression via WASM.
1198///
1199/// Elastic Net combines L1 (Lasso) and L2 (Ridge) penalties.
1200///
1201/// # Arguments
1202///
1203/// * `y_json` - JSON array of response variable values
1204/// * `x_vars_json` - JSON array of predictor arrays
1205/// * `variable_names` - JSON array of variable names
1206/// * `lambda` - Regularization strength (>= 0)
1207/// * `alpha` - Elastic net mixing parameter (0 = Ridge, 1 = Lasso)
1208/// * `standardize` - Whether to standardize predictors (recommended: true)
1209/// * `max_iter` - Maximum coordinate descent iterations
1210/// * `tol` - Convergence tolerance
1211///
1212/// # Returns
1213///
1214/// JSON string containing regression results (same fields as Lasso).
1215///
1216/// # Errors
1217///
1218/// Returns a JSON error object if parsing fails, parameters are invalid,
1219/// or domain check fails.
1220pub fn elastic_net_regression(
1221    y_json: &str,
1222    x_vars_json: &str,
1223    _variable_names: &str,
1224    lambda: f64,
1225    alpha: f64,
1226    standardize: bool,
1227    max_iter: usize,
1228    tol: f64,
1229) -> String {
1230    if let Err(e) = check_domain() {
1231        return error_to_json(&e);
1232    }
1233
1234    // Parse JSON input
1235    let y: Vec<f64> = match serde_json::from_str(y_json) {
1236        Ok(v) => v,
1237        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1238    };
1239
1240    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1241        Ok(v) => v,
1242        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1243    };
1244
1245    // Build design matrix with intercept column
1246    let n = y.len();
1247    let p = x_vars.len();
1248
1249    if n <= p + 1 {
1250        return error_json(&format!(
1251            "Insufficient data: need at least {} observations for {} predictors",
1252            p + 2,
1253            p
1254        ));
1255    }
1256
1257    let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1258    for (j, x_var) in x_vars.iter().enumerate() {
1259        if x_var.len() != n {
1260            return error_json(&format!(
1261                "x_vars[{}] has {} elements, expected {}",
1262                j,
1263                x_var.len(),
1264                n
1265            ));
1266        }
1267        for (i, &val) in x_var.iter().enumerate() {
1268            x_data[i * (p + 1) + j + 1] = val;
1269        }
1270    }
1271
1272    let x = linalg::Matrix::new(n, p + 1, x_data);
1273
1274    // Configure elastic net options
1275    let options = regularized::elastic_net::ElasticNetOptions {
1276        lambda,
1277        alpha,
1278        intercept: true,
1279        standardize,
1280        max_iter,
1281        tol,
1282        ..Default::default()
1283    };
1284
1285    match regularized::elastic_net::elastic_net_fit(&x, &y, &options) {
1286        Ok(output) => serde_json::to_string(&output)
1287            .unwrap_or_else(|_| error_json("Failed to serialize elastic net regression result")),
1288        Err(e) => error_json(&e.to_string()),
1289    }
1290}
1291
1292#[cfg(feature = "wasm")]
1293#[wasm_bindgen]
1294/// Generates a lambda path for regularized regression via WASM.
1295///
1296/// Creates a logarithmically-spaced sequence of lambda values from lambda_max
1297/// (where all penalized coefficients are zero) down to lambda_min. This is
1298/// useful for fitting regularization paths and selecting optimal lambda via
1299/// cross-validation.
1300///
1301/// # Arguments
1302///
1303/// * `y_json` - JSON array of response variable values
1304/// * `x_vars_json` - JSON array of predictor arrays
1305/// * `n_lambda` - Number of lambda values to generate (default: 100)
1306/// * `lambda_min_ratio` - Ratio for smallest lambda (default: 0.0001 if n >= p, else 0.01)
1307///
1308/// # Returns
1309///
1310/// JSON string containing:
1311/// - `lambda_path` - Array of lambda values in decreasing order
1312/// - `lambda_max` - Maximum lambda value
1313/// - `lambda_min` - Minimum lambda value
1314/// - `n_lambda` - Number of lambda values
1315///
1316/// # Errors
1317///
1318/// Returns a JSON error object if parsing fails or domain check fails.
1319pub fn make_lambda_path(
1320    y_json: &str,
1321    x_vars_json: &str,
1322    n_lambda: usize,
1323    lambda_min_ratio: f64,
1324) -> String {
1325    if let Err(e) = check_domain() {
1326        return error_to_json(&e);
1327    }
1328
1329    // Parse JSON input
1330    let y: Vec<f64> = match serde_json::from_str(y_json) {
1331        Ok(v) => v,
1332        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1333    };
1334
1335    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1336        Ok(v) => v,
1337        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1338    };
1339
1340    // Build design matrix with intercept column
1341    let n = y.len();
1342    let p = x_vars.len();
1343
1344    let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1345    for (j, x_var) in x_vars.iter().enumerate() {
1346        if x_var.len() != n {
1347            return error_json(&format!(
1348                "x_vars[{}] has {} elements, expected {}",
1349                j,
1350                x_var.len(),
1351                n
1352            ));
1353        }
1354        for (i, &val) in x_var.iter().enumerate() {
1355            x_data[i * (p + 1) + j + 1] = val;
1356        }
1357    }
1358
1359    let x = linalg::Matrix::new(n, p + 1, x_data);
1360
1361    // Standardize X for lambda path computation
1362    let x_mean: Vec<f64> = (0..x.cols)
1363        .map(|j| {
1364            if j == 0 {
1365                1.0 // Intercept column
1366            } else {
1367                (0..n).map(|i| x.get(i, j)).sum::<f64>() / n as f64
1368            }
1369        })
1370        .collect();
1371
1372    let x_standardized: Vec<f64> = (0..x.cols)
1373        .map(|j| {
1374            if j == 0 {
1375                0.0 // Intercept column - no centering
1376            } else {
1377                let mean = x_mean[j];
1378                let variance =
1379                    (0..n).map(|i| (x.get(i, j) - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
1380                variance.sqrt()
1381            }
1382        })
1383        .collect();
1384
1385    // Build standardized X matrix
1386    let mut x_standardized_data = vec![1.0; n * (p + 1)];
1387    for j in 0..x.cols {
1388        for i in 0..n {
1389            if j == 0 {
1390                x_standardized_data[i * (p + 1)] = 1.0; // Intercept
1391            } else {
1392                let std = x_standardized[j];
1393                if std > 1e-10 {
1394                    x_standardized_data[i * (p + 1) + j] = (x.get(i, j) - x_mean[j]) / std;
1395                } else {
1396                    x_standardized_data[i * (p + 1) + j] = 0.0;
1397                }
1398            }
1399        }
1400    }
1401    let x_standardized = linalg::Matrix::new(n, p + 1, x_standardized_data);
1402
1403    // Center y
1404    let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
1405    let y_centered: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
1406
1407    // Configure lambda path options
1408    let options = regularized::path::LambdaPathOptions {
1409        nlambda: n_lambda.max(1),
1410        lambda_min_ratio: if lambda_min_ratio > 0.0 {
1411            Some(lambda_min_ratio)
1412        } else {
1413            None
1414        },
1415        alpha: 1.0, // Lasso
1416        ..Default::default()
1417    };
1418
1419    let lambda_path =
1420        regularized::path::make_lambda_path(&x_standardized, &y_centered, &options, None, Some(0));
1421
1422    let lambda_max = lambda_path.first().copied().unwrap_or(0.0);
1423    let lambda_min = lambda_path.last().copied().unwrap_or(0.0);
1424
1425    // Return as JSON (note: infinity serializes as null in JSON, handled in JS)
1426    let result = serde_json::json!({
1427        "lambda_path": lambda_path,
1428        "lambda_max": lambda_max,
1429        "lambda_min": lambda_min,
1430        "n_lambda": lambda_path.len()
1431    });
1432
1433    result.to_string()
1434}
1435
1436// ============================================================================
1437// Statistical Utility Functions (WASM wrappers)
1438// ============================================================================
1439
1440#[cfg(feature = "wasm")]
1441#[wasm_bindgen]
1442/// Computes the Student's t-distribution cumulative distribution function.
1443///
1444/// Returns P(T ≤ t) for a t-distribution with the given degrees of freedom.
1445///
1446/// # Arguments
1447///
1448/// * `t` - t-statistic value
1449/// * `df` - Degrees of freedom
1450///
1451/// # Returns
1452///
1453/// The CDF value, or `NaN` if domain check fails.
1454pub fn get_t_cdf(t: f64, df: f64) -> f64 {
1455    if check_domain().is_err() {
1456        return f64::NAN;
1457    }
1458
1459    student_t_cdf(t, df)
1460}
1461
1462#[cfg(feature = "wasm")]
1463#[wasm_bindgen]
1464/// Computes the critical t-value for a given significance level.
1465///
1466/// Returns the t-value such that the area under the t-distribution curve
1467/// to the right equals alpha/2 (two-tailed test).
1468///
1469/// # Arguments
1470///
1471/// * `alpha` - Significance level (typically 0.05 for 95% confidence)
1472/// * `df` - Degrees of freedom
1473///
1474/// # Returns
1475///
1476/// The critical t-value, or `NaN` if domain check fails.
1477pub fn get_t_critical(alpha: f64, df: f64) -> f64 {
1478    if check_domain().is_err() {
1479        return f64::NAN;
1480    }
1481
1482    core::t_critical_quantile(df, alpha)
1483}
1484
1485#[cfg(feature = "wasm")]
1486#[wasm_bindgen]
1487/// Computes the inverse of the standard normal CDF (probit function).
1488///
1489/// Returns the z-score such that P(Z ≤ z) = p for a standard normal distribution.
1490///
1491/// # Arguments
1492///
1493/// * `p` - Probability (0 < p < 1)
1494///
1495/// # Returns
1496///
1497/// The z-score, or `NaN` if domain check fails.
1498pub fn get_normal_inverse(p: f64) -> f64 {
1499    if check_domain().is_err() {
1500        return f64::NAN;
1501    }
1502
1503    normal_inverse_cdf(p)
1504}
1505
1506// ============================================================================
1507// Statistical Utilities (WASM-only)
1508// ============================================================================
1509
1510#[cfg(feature = "wasm")]
1511#[wasm_bindgen]
1512/// Computes the arithmetic mean of a JSON array of f64 values.
1513///
1514/// # Arguments
1515///
1516/// * `data_json` - JSON string representing an array of f64 values
1517///
1518/// # Returns
1519///
1520/// JSON string with the mean, or "null" if input is invalid/empty
1521pub fn stats_mean(data_json: String) -> String {
1522    if check_domain().is_err() {
1523        return "null".to_string();
1524    }
1525
1526    let data: Vec<f64> = match serde_json::from_str(&data_json) {
1527        Ok(d) => d,
1528        Err(_) => return "null".to_string(),
1529    };
1530
1531    serde_json::to_string(&stats::mean(&data)).unwrap_or("null".to_string())
1532}
1533
1534#[cfg(feature = "wasm")]
1535#[wasm_bindgen]
1536/// Computes the sample standard deviation of a JSON array of f64 values.
1537///
1538/// Uses the (n-1) denominator for unbiased estimation.
1539///
1540/// # Arguments
1541///
1542/// * `data_json` - JSON string representing an array of f64 values
1543///
1544/// # Returns
1545///
1546/// JSON string with the standard deviation, or "null" if input is invalid
1547pub fn stats_stddev(data_json: String) -> String {
1548    if check_domain().is_err() {
1549        return "null".to_string();
1550    }
1551
1552    let data: Vec<f64> = match serde_json::from_str(&data_json) {
1553        Ok(d) => d,
1554        Err(_) => return "null".to_string(),
1555    };
1556
1557    serde_json::to_string(&stats::stddev(&data)).unwrap_or("null".to_string())
1558}
1559
1560#[cfg(feature = "wasm")]
1561#[wasm_bindgen]
1562/// Computes the sample variance of a JSON array of f64 values.
1563///
1564/// Uses the (n-1) denominator for unbiased estimation.
1565///
1566/// # Arguments
1567///
1568/// * `data_json` - JSON string representing an array of f64 values
1569///
1570/// # Returns
1571///
1572/// JSON string with the variance, or "null" if input is invalid
1573pub fn stats_variance(data_json: String) -> String {
1574    if check_domain().is_err() {
1575        return "null".to_string();
1576    }
1577
1578    let data: Vec<f64> = match serde_json::from_str(&data_json) {
1579        Ok(d) => d,
1580        Err(_) => return "null".to_string(),
1581    };
1582
1583    serde_json::to_string(&stats::variance(&data)).unwrap_or("null".to_string())
1584}
1585
1586#[cfg(feature = "wasm")]
1587#[wasm_bindgen]
1588/// Computes the median of a JSON array of f64 values.
1589///
1590/// # Arguments
1591///
1592/// * `data_json` - JSON string representing an array of f64 values
1593///
1594/// # Returns
1595///
1596/// JSON string with the median, or "null" if input is invalid/empty
1597pub fn stats_median(data_json: String) -> String {
1598    if check_domain().is_err() {
1599        return "null".to_string();
1600    }
1601
1602    let data: Vec<f64> = match serde_json::from_str(&data_json) {
1603        Ok(d) => d,
1604        Err(_) => return "null".to_string(),
1605    };
1606
1607    serde_json::to_string(&stats::median(&data)).unwrap_or("null".to_string())
1608}
1609
1610#[cfg(feature = "wasm")]
1611#[wasm_bindgen]
1612/// Computes a quantile of a JSON array of f64 values.
1613///
1614/// # Arguments
1615///
1616/// * `data_json` - JSON string representing an array of f64 values
1617/// * `q` - Quantile to calculate (0.0 to 1.0)
1618///
1619/// # Returns
1620///
1621/// JSON string with the quantile value, or "null" if input is invalid
1622pub fn stats_quantile(data_json: String, q: f64) -> String {
1623    if check_domain().is_err() {
1624        return "null".to_string();
1625    }
1626
1627    let data: Vec<f64> = match serde_json::from_str(&data_json) {
1628        Ok(d) => d,
1629        Err(_) => return "null".to_string(),
1630    };
1631
1632    serde_json::to_string(&stats::quantile(&data, q)).unwrap_or("null".to_string())
1633}
1634
1635#[cfg(feature = "wasm")]
1636#[wasm_bindgen]
1637/// Computes the correlation coefficient between two JSON arrays of f64 values.
1638///
1639/// # Arguments
1640///
1641/// * `x_json` - JSON string representing the first array of f64 values
1642/// * `y_json` - JSON string representing the second array of f64 values
1643///
1644/// # Returns
1645///
1646/// JSON string with the correlation coefficient, or "null" if input is invalid
1647pub fn stats_correlation(x_json: String, y_json: String) -> String {
1648    if check_domain().is_err() {
1649        return "null".to_string();
1650    }
1651
1652    let x: Vec<f64> = match serde_json::from_str(&x_json) {
1653        Ok(d) => d,
1654        Err(_) => return "null".to_string(),
1655    };
1656
1657    let y: Vec<f64> = match serde_json::from_str(&y_json) {
1658        Ok(d) => d,
1659        Err(_) => return "null".to_string(),
1660    };
1661
1662    serde_json::to_string(&stats::correlation(&x, &y)).unwrap_or("null".to_string())
1663}
1664
1665// ============================================================================
1666// Domain Check (WASM-only)
1667// ============================================================================
1668//
1669// By default, all domains are allowed. To enable domain restriction, set the
1670// LINREG_DOMAIN_RESTRICT environment variable at build time:
1671//
1672//   LINREG_DOMAIN_RESTRICT=example.com,yoursite.com wasm-pack build
1673//
1674// Example for jesse-anderson.net:
1675//   LINREG_DOMAIN_RESTRICT=jesse-anderson.net,tools.jesse-anderson.net,localhost,127.0.0.1 wasm-pack build
1676//
1677// This allows downstream users to use the library without modification while
1678// still providing domain restriction as an opt-in security feature.
1679
1680#[cfg(feature = "wasm")]
1681fn check_domain() -> Result<()> {
1682    // Read allowed domains from build-time environment variable
1683    let allowed_domains = option_env!("LINREG_DOMAIN_RESTRICT");
1684
1685    match allowed_domains {
1686        Some(domains) if !domains.is_empty() => {
1687            // Domain restriction is enabled
1688            let window =
1689                web_sys::window().ok_or(Error::DomainCheck("No window found".to_string()))?;
1690            let location = window.location();
1691            let hostname = location
1692                .hostname()
1693                .map_err(|_| Error::DomainCheck("No hostname found".to_string()))?;
1694
1695            let domain_list: Vec<&str> = domains.split(',').map(|s| s.trim()).collect();
1696
1697            if domain_list.contains(&hostname.as_str()) {
1698                Ok(())
1699            } else {
1700                Err(Error::DomainCheck(format!(
1701                    "Unauthorized domain: {}. Allowed: {}",
1702                    hostname, domains
1703                )))
1704            }
1705        },
1706        _ => {
1707            // No restriction - allow all domains
1708            Ok(())
1709        },
1710    }
1711}
1712
1713// ============================================================================
1714// Test Functions (WASM-only)
1715// ============================================================================
1716
1717#[cfg(test)]
1718mod tests {
1719    use super::*;
1720
1721    #[test]
1722    fn verify_housing_regression_integrity() {
1723        let result = test_housing_regression_native();
1724        if let Err(e) = result {
1725            panic!("Regression test failed: {}", e);
1726        }
1727    }
1728
1729    /// Test that test_housing_regression_native produces valid JSON
1730    #[test]
1731    fn test_housing_regression_json_output() {
1732        let result = test_housing_regression_native().unwrap();
1733        // Should be valid JSON
1734        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
1735        // Should have status field
1736        assert!(parsed.get("status").is_some());
1737        // Status should be PASS (we control the test data)
1738        assert_eq!(parsed["status"], "PASS");
1739    }
1740
1741    /// Test housing regression with actual R reference values
1742    #[test]
1743    fn test_housing_regression_coefficients() {
1744        let y = vec![
1745            245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9,
1746            367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7,
1747            267.9,
1748        ];
1749
1750        let square_feet = vec![
1751            1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1752            2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1753            1250.0, 1700.0, 850.0, 2350.0, 1400.0,
1754        ];
1755        let bedrooms = vec![
1756            3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0,
1757            3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
1758        ];
1759        let age = vec![
1760            15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0,
1761            22.0, 1.0, 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
1762        ];
1763
1764        let x_vars = vec![square_feet, bedrooms, age];
1765        let names = vec![
1766            "Intercept".to_string(),
1767            "Square_Feet".to_string(),
1768            "Bedrooms".to_string(),
1769            "Age".to_string(),
1770        ];
1771
1772        let result = core::ols_regression(&y, &x_vars, &names).unwrap();
1773
1774        // Check against R results
1775        let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
1776        let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
1777
1778        let tolerance = 1e-4;
1779        for i in 0..4 {
1780            assert!(
1781                (result.coefficients[i] - expected_coeffs[i]).abs() < tolerance,
1782                "coeff[{}] differs: got {}, expected {}",
1783                i,
1784                result.coefficients[i],
1785                expected_coeffs[i]
1786            );
1787            assert!(
1788                (result.std_errors[i] - expected_std_errs[i]).abs() < tolerance,
1789                "std_err[{}] differs: got {}, expected {}",
1790                i,
1791                result.std_errors[i],
1792                expected_std_errs[i]
1793            );
1794        }
1795    }
1796
1797    /// Test R-squared calculation in housing regression
1798    #[test]
1799    fn test_housing_regression_r_squared() {
1800        let result = test_housing_regression_native().unwrap();
1801        let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
1802
1803        // If status is PASS, R² should be reasonable (between 0 and 1)
1804        assert_eq!(parsed["status"], "PASS");
1805    }
1806
1807    /// Test that housing regression handles all expected output fields
1808    #[test]
1809    fn test_housing_regression_comprehensive() {
1810        let y = vec![
1811            245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1,
1812        ];
1813        let x1 = vec![1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0];
1814        let x2 = vec![3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0];
1815
1816        let result = core::ols_regression(&y, &[x1, x2], &["Intercept".into(), "X1".into(), "X2".into()])
1817            .unwrap();
1818
1819        // Verify expected output fields exist
1820        assert!(!result.coefficients.is_empty());
1821        assert!(!result.std_errors.is_empty());
1822        assert!(!result.t_stats.is_empty());
1823        assert!(!result.p_values.is_empty());
1824        assert!(result.r_squared >= 0.0 && result.r_squared <= 1.0);
1825        assert!(result.residuals.len() == y.len());
1826    }
1827
1828    /// Test error handling when insufficient data is provided
1829    #[test]
1830    fn test_housing_regression_insufficient_data() {
1831        let y = vec![245.5, 312.8]; // Only 2 observations
1832        let x1 = vec![1200.0, 1800.0];
1833        let x2 = vec![3.0, 4.0];
1834
1835        let result = core::ols_regression(&y, &[x1, x2], &["Intercept".into(), "X1".into(), "X2".into()]);
1836        assert!(result.is_err());
1837    }
1838
1839    /// Test housing regression precision with tolerance check
1840    #[test]
1841    fn test_housing_regression_tolerance_check() {
1842        let y = vec![
1843            245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9,
1844            367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7,
1845            267.9,
1846        ];
1847
1848        let square_feet = vec![
1849            1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1850            2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1851            1250.0, 1700.0, 850.0, 2350.0, 1400.0,
1852        ];
1853        let bedrooms = vec![
1854            3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0,
1855            3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
1856        ];
1857        let age = vec![
1858            15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0,
1859            22.0, 1.0, 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
1860        ];
1861
1862        let x_vars = vec![square_feet, bedrooms, age];
1863        let names = vec![
1864            "Intercept".to_string(),
1865            "Square_Feet".to_string(),
1866            "Bedrooms".to_string(),
1867            "Age".to_string(),
1868        ];
1869
1870        let result = core::ols_regression(&y, &x_vars, &names).unwrap();
1871
1872        // Verify all coefficient values are finite
1873        for coef in &result.coefficients {
1874            assert!(coef.is_finite(), "Coefficient should be finite");
1875        }
1876        // Verify all standard errors are positive and finite
1877        for se in &result.std_errors {
1878            assert!(se.is_finite(), "Standard error should be finite");
1879            if *se <= 0.0 {
1880                panic!("Standard error should be positive, got {}", se);
1881            }
1882        }
1883    }
1884}
1885
1886#[cfg(feature = "wasm")]
1887#[wasm_bindgen]
1888/// Simple test function to verify WASM is working.
1889///
1890/// Returns a success message confirming the WASM module loaded correctly.
1891///
1892/// # Errors
1893///
1894/// Returns a JSON error object if domain check fails.
1895pub fn test() -> String {
1896    if let Err(e) = check_domain() {
1897        return error_to_json(&e);
1898    }
1899    "Rust WASM is working!".to_string()
1900}
1901
1902#[cfg(feature = "wasm")]
1903#[wasm_bindgen]
1904/// Returns the current version of the library.
1905///
1906/// Returns the Cargo package version as a string (e.g., "0.1.0").
1907///
1908/// # Errors
1909///
1910/// Returns a JSON error object if domain check fails.
1911pub fn get_version() -> String {
1912    if let Err(e) = check_domain() {
1913        return error_to_json(&e);
1914    }
1915    env!("CARGO_PKG_VERSION").to_string()
1916}
1917
1918#[cfg(feature = "wasm")]
1919#[wasm_bindgen]
1920/// Test function for t-critical value computation.
1921///
1922/// Returns JSON with the computed t-critical value for the given parameters.
1923///
1924/// # Errors
1925///
1926/// Returns a JSON error object if domain check fails.
1927pub fn test_t_critical(df: f64, alpha: f64) -> String {
1928    if let Err(e) = check_domain() {
1929        return error_to_json(&e);
1930    }
1931    let t_crit = core::t_critical_quantile(df, alpha);
1932    format!(
1933        r#"{{"df": {}, "alpha": {}, "t_critical": {}}}"#,
1934        df, alpha, t_crit
1935    )
1936}
1937
1938#[cfg(feature = "wasm")]
1939#[wasm_bindgen]
1940/// Test function for confidence interval computation.
1941///
1942/// Returns JSON with the computed confidence interval for a coefficient.
1943///
1944/// # Errors
1945///
1946/// Returns a JSON error object if domain check fails.
1947pub fn test_ci(coef: f64, se: f64, df: f64, alpha: f64) -> String {
1948    if let Err(e) = check_domain() {
1949        return error_to_json(&e);
1950    }
1951    let t_crit = core::t_critical_quantile(df, alpha);
1952    format!(
1953        r#"{{"lower": {}, "upper": {}}}"#,
1954        coef - t_crit * se,
1955        coef + t_crit * se
1956    )
1957}
1958
1959#[cfg(feature = "wasm")]
1960#[wasm_bindgen]
1961/// Test function for R accuracy validation.
1962///
1963/// Returns JSON comparing our statistical functions against R reference values.
1964///
1965/// # Errors
1966///
1967/// Returns a JSON error object if domain check fails.
1968pub fn test_r_accuracy() -> String {
1969    if let Err(e) = check_domain() {
1970        return error_to_json(&e);
1971    }
1972    format!(
1973        r#"{{"two_tail_p": {}, "qt_975": {}}}"#,
1974        core::two_tailed_p_value(1.6717, 21.0),
1975        core::t_critical_quantile(21.0, 0.05)
1976    )
1977}
1978
1979#[cfg(feature = "wasm")]
1980#[wasm_bindgen]
1981/// Test function for regression validation against R reference values.
1982///
1983/// Runs a regression on a housing dataset and compares results against R's lm() output.
1984/// Returns JSON with status "PASS" or "FAIL" with details.
1985///
1986/// # Errors
1987///
1988/// Returns a JSON error object if domain check fails.
1989pub fn test_housing_regression() -> String {
1990    if let Err(e) = check_domain() {
1991        return error_to_json(&e);
1992    }
1993
1994    match test_housing_regression_native() {
1995        Ok(result) => result,
1996        Err(e) => serde_json::json!({ "status": "ERROR", "error": e.to_string() }).to_string(),
1997    }
1998}
1999
2000// Native Rust test function (works without WASM feature)
2001#[cfg(any(test, feature = "wasm"))]
2002fn test_housing_regression_native() -> Result<String> {
2003    let y = vec![
2004        245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9, 367.4,
2005        289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7, 267.9,
2006    ];
2007
2008    let square_feet = vec![
2009        1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0, 2200.0,
2010        900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0, 1250.0, 1700.0,
2011        850.0, 2350.0, 1400.0,
2012    ];
2013    let bedrooms = vec![
2014        3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0, 3.0, 4.0,
2015        2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
2016    ];
2017    let age = vec![
2018        15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0, 22.0, 1.0,
2019        16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
2020    ];
2021
2022    let x_vars = vec![square_feet, bedrooms, age];
2023    let names = vec![
2024        "Intercept".to_string(),
2025        "Square_Feet".to_string(),
2026        "Bedrooms".to_string(),
2027        "Age".to_string(),
2028    ];
2029
2030    let result = core::ols_regression(&y, &x_vars, &names)?;
2031
2032    // Check against R results
2033    let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
2034    let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
2035
2036    let tolerance = 1e-4;
2037    let mut mismatches = vec![];
2038
2039    for i in 0..4 {
2040        if (result.coefficients[i] - expected_coeffs[i]).abs() > tolerance {
2041            mismatches.push(format!(
2042                "coeff[{}] differs: got {}, expected {}",
2043                i, result.coefficients[i], expected_coeffs[i]
2044            ));
2045        }
2046        if (result.std_errors[i] - expected_std_errs[i]).abs() > tolerance {
2047            mismatches.push(format!(
2048                "std_err[{}] differs: got {}, expected {}",
2049                i, result.std_errors[i], expected_std_errs[i]
2050            ));
2051        }
2052    }
2053
2054    if mismatches.is_empty() {
2055        Ok(serde_json::json!({ "status": "PASS" }).to_string())
2056    } else {
2057        Ok(serde_json::json!({ "status": "FAIL", "mismatches": mismatches }).to_string())
2058    }
2059}