linreg_core/lib.rs
1//! # linreg-core
2//!
3//! A lightweight, self-contained linear regression library in pure Rust.
4//!
5//! **No external math dependencies.** All linear algebra (matrices, QR decomposition)
6//! and statistical functions (distributions, hypothesis tests) are implemented from
7//! scratch. Compiles to WebAssembly for browser use or runs as a native Rust crate.
8//!
9//! ## What This Does
10//!
11//! - **OLS Regression** — Ordinary Least Squares with numerically stable QR decomposition
12//! - **Regularized Regression** — Ridge, Lasso, and Elastic Net via coordinate descent
13//! - **Diagnostic Tests** — 8+ statistical tests for validating regression assumptions
14//! - **WASM Support** — Same API works in browsers via WebAssembly
15//!
16//! ## Quick Start
17//!
18//! ### Native Rust
19//!
20//! Add to `Cargo.toml` (no WASM overhead):
21//!
22//! ```toml
23//! [dependencies]
24//! linreg-core = { version = "0.3", default-features = false }
25//! ```
26//!
27//! ```rust
28//! use linreg_core::core::ols_regression;
29//!
30//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
31//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
32//! let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
33//! let names = vec!["Intercept".into(), "Temp".into(), "Pressure".into()];
34//!
35//! let result = ols_regression(&y, &[x1, x2], &names)?;
36//! println!("R²: {}", result.r_squared);
37//! println!("F-statistic: {}", result.f_statistic);
38//! # Ok::<(), linreg_core::Error>(())
39//! ```
40//!
41//! ### WebAssembly (JavaScript)
42//!
43//! ```toml
44//! [dependencies]
45//! linreg-core = "0.3"
46//! ```
47//!
48//! Build with `wasm-pack build --target web`, then use in JavaScript:
49//!
50//! ```text
51//! import init, { ols_regression } from './linreg_core.js';
52//! await init();
53//!
54//! const result = JSON.parse(ols_regression(
55//! JSON.stringify([2.5, 3.7, 4.2, 5.1, 6.3]),
56//! JSON.stringify([[1,2,3,4,5], [2,4,5,4,3]]),
57//! JSON.stringify(["Intercept", "X1", "X2"])
58//! ));
59//! console.log("R²:", result.r_squared);
60//! ```
61//!
62//! ## Regularized Regression
63//!
64//! ```no_run
65//! use linreg_core::regularized::{ridge, lasso};
66//! use linreg_core::linalg::Matrix;
67//!
68//! let x = Matrix::new(100, 3, vec![0.0; 300]);
69//! let y = vec![0.0; 100];
70//!
71//! // Ridge regression (L2 penalty - shrinks coefficients, handles multicollinearity)
72//! let ridge_result = ridge::ridge_fit(&x, &y, &ridge::RidgeFitOptions {
73//! lambda: 1.0,
74//! intercept: true,
75//! standardize: true,
76//! ..Default::default()
77//! })?;
78//!
79//! // Lasso regression (L1 penalty - does variable selection by zeroing coefficients)
80//! let lasso_result = lasso::lasso_fit(&x, &y, &lasso::LassoFitOptions {
81//! lambda: 0.1,
82//! intercept: true,
83//! standardize: true,
84//! ..Default::default()
85//! })?;
86//! # Ok::<(), linreg_core::Error>(())
87//! ```
88//!
89//! ## Diagnostic Tests
90//!
91//! After fitting a model, validate its assumptions:
92//!
93//! | Test | Tests For | Use When |
94//! |------|-----------|----------|
95//! | [`rainbow_test`] | Linearity | Checking if relationships are linear |
96//! | [`harvey_collier_test`] | Functional form | Suspecting model misspecification |
97//! | [`breusch_pagan_test`] | Heteroscedasticity | Variance changes with predictors |
98//! | [`white_test`] | Heteroscedasticity | More general than Breusch-Pagan |
99//! | [`shapiro_wilk_test`] | Normality | Small to moderate samples (n ≤ 5000) |
100//! | [`jarque_bera_test`] | Normality | Large samples, skewness/kurtosis |
101//! | [`anderson_darling_test`] | Normality | Tail-sensitive, any sample size |
102//! | [`durbin_watson_test`] | Autocorrelation | Time series or ordered data |
103//! | [`cooks_distance_test`] | Influential points | Identifying high-impact observations |
104//!
105//! ```rust
106//! use linreg_core::diagnostics::{rainbow_test, breusch_pagan_test, RainbowMethod};
107//!
108//! # let y = vec![2.5, 3.7, 4.2, 5.1, 6.3];
109//! # let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
110//! # let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0];
111//! // Rainbow test for linearity
112//! let rainbow = rainbow_test(&y, &[x1.clone(), x2.clone()], 0.5, RainbowMethod::R)?;
113//! if rainbow.r_result.as_ref().map_or(false, |r| r.p_value < 0.05) {
114//! println!("Warning: relationship may be non-linear");
115//! }
116//!
117//! // Breusch-Pagan test for heteroscedasticity
118//! let bp = breusch_pagan_test(&y, &[x1, x2])?;
119//! if bp.p_value < 0.05 {
120//! println!("Warning: residuals have non-constant variance");
121//! }
122//! # Ok::<(), linreg_core::Error>(())
123//! ```
124//!
125//! ## Feature Flags
126//!
127//! | Flag | Default | Description |
128//! |------|---------|-------------|
129//! | `wasm` | Yes | Enables WASM bindings and browser support |
130//! | `validation` | No | Includes test data for validation tests |
131//!
132//! For native-only builds (smaller binary, no WASM deps):
133//!
134//! ```toml
135//! linreg-core = { version = "0.3", default-features = false }
136//! ```
137//!
138//! ## Why This Library?
139//!
140//! - **Zero dependencies** — No `nalgebra`, no `statrs`, no `ndarray`. Pure Rust.
141//! - **Validated** — Outputs match R's `lm()` and Python's `statsmodels`
142//! - **WASM-ready** — Same code runs natively and in browsers
143//! - **Small** — Core is ~2000 lines, compiles quickly
144//! - **Permissive license** — MIT OR Apache-2.0
145//!
146//! ## Module Structure
147//!
148//! - [`core`] — OLS regression, coefficients, residuals, VIF
149//! - [`regularized`] — Ridge, Lasso, Elastic Net, regularization paths
150//! - [`diagnostics`] — All diagnostic tests (linearity, heteroscedasticity, normality, autocorrelation)
151//! - [`distributions`] — Statistical distributions (t, F, χ², normal, beta, gamma)
152//! - [`linalg`] — Matrix operations, QR decomposition, linear system solver
153//! - [`error`] — Error types and Result alias
154//!
155//! ## Links
156//!
157//! - [Repository](https://github.com/jesse-anderson/linreg-core)
158//! - [Documentation](https://docs.rs/linreg-core)
159//! - [Examples](https://github.com/jesse-anderson/linreg-core/tree/main/examples)
160//!
161//! ## Disclaimer
162//!
163//! This library is under active development and has not reached 1.0 stability.
164//! While outputs are validated against R and Python implementations, **do not
165//! use this library for critical applications** (medical, financial, safety-critical
166//! systems) without independent verification. See the LICENSE for full terms.
167//! The software is provided "as is" without warranty of any kind.
168
169// Import core modules (always available)
170pub mod core;
171pub mod diagnostics;
172pub mod distributions;
173pub mod error;
174pub mod linalg;
175pub mod regularized;
176pub mod stats;
177
178// Unit tests are now in tests/unit/ directory
179// - error_tests.rs -> tests/unit/error_tests.rs
180// - core_tests.rs -> tests/unit/core_tests.rs
181// - linalg_tests.rs -> tests/unit/linalg_tests.rs
182// - validation_tests.rs -> tests/validation/main.rs
183// - diagnostics_tests.rs: disabled (references unimplemented functions)
184
185// Re-export public API (always available)
186pub use core::{RegressionOutput, VifResult};
187pub use diagnostics::{
188 BGTestType, BreuschGodfreyResult, CooksDistanceResult, DiagnosticTestResult,
189 RainbowMethod, RainbowSingleResult, RainbowTestOutput, ResetType,
190 WhiteMethod, WhiteSingleResult, WhiteTestOutput,
191};
192
193// Re-export core test functions with different names to avoid WASM conflicts
194pub use diagnostics::rainbow_test as rainbow_test_core;
195pub use diagnostics::white_test as white_test_core;
196
197pub use error::{error_json, error_to_json, Error, Result};
198
199// ============================================================================
200// WASM-specific code (only compiled when "wasm" feature is enabled)
201// ============================================================================
202
203#[cfg(feature = "wasm")]
204use wasm_bindgen::prelude::*;
205
206#[cfg(feature = "wasm")]
207use std::collections::HashSet;
208
209#[cfg(feature = "wasm")]
210use serde::Serialize;
211
212#[cfg(feature = "wasm")]
213use crate::distributions::{normal_inverse_cdf, student_t_cdf};
214
215// ============================================================================
216// CSV Parsing (WASM-only)
217// ============================================================================
218
219#[cfg(feature = "wasm")]
220#[derive(Serialize)]
221struct ParsedCsv {
222 headers: Vec<String>,
223 data: Vec<serde_json::Map<String, serde_json::Value>>,
224 numeric_columns: Vec<String>,
225}
226
227#[cfg(feature = "wasm")]
228#[wasm_bindgen]
229/// Parses CSV data and returns it as a JSON string.
230///
231/// Parses the CSV content and identifies numeric columns. Returns a JSON object
232/// with headers, data rows, and a list of numeric column names.
233///
234/// # Arguments
235///
236/// * `content` - CSV content as a string
237///
238/// # Returns
239///
240/// JSON string with structure:
241/// ```json
242/// {
243/// "headers": ["col1", "col2", ...],
244/// "data": [{"col1": 1.0, "col2": "text"}, ...],
245/// "numeric_columns": ["col1", ...]
246/// }
247/// ```
248///
249/// # Errors
250///
251/// Returns a JSON error object if parsing fails or domain check fails.
252pub fn parse_csv(content: &str) -> String {
253 if let Err(e) = check_domain() {
254 return error_to_json(&e);
255 }
256
257 let mut reader = csv::ReaderBuilder::new()
258 .has_headers(true)
259 .flexible(true)
260 .from_reader(content.as_bytes());
261
262 // Get headers
263 let headers: Vec<String> = match reader.headers() {
264 Ok(h) => h.iter().map(|s| s.to_string()).collect(),
265 Err(e) => return error_json(&format!("Failed to read headers: {}", e)),
266 };
267
268 let mut data = Vec::new();
269 let mut numeric_col_set = HashSet::new();
270
271 for result in reader.records() {
272 let record = match result {
273 Ok(r) => r,
274 Err(e) => return error_json(&format!("Failed to parse CSV record: {}", e)),
275 };
276
277 if record.len() != headers.len() {
278 continue;
279 }
280
281 let mut row_map = serde_json::Map::new();
282
283 for (i, field) in record.iter().enumerate() {
284 if i >= headers.len() {
285 continue;
286 }
287
288 let header = &headers[i];
289 let val_trimmed = field.trim();
290
291 // Try to parse as f64
292 if let Ok(num) = val_trimmed.parse::<f64>() {
293 if num.is_finite() {
294 row_map.insert(
295 header.clone(),
296 serde_json::Value::Number(serde_json::Number::from_f64(num).unwrap()),
297 );
298 numeric_col_set.insert(header.clone());
299 continue;
300 }
301 }
302
303 // Fallback to string
304 row_map.insert(
305 header.clone(),
306 serde_json::Value::String(val_trimmed.to_string()),
307 );
308 }
309 data.push(row_map);
310 }
311
312 let mut numeric_columns: Vec<String> = numeric_col_set.into_iter().collect();
313 numeric_columns.sort();
314
315 let output = ParsedCsv {
316 headers,
317 data,
318 numeric_columns,
319 };
320
321 serde_json::to_string(&output).unwrap_or_else(|_| error_json("Failed to serialize CSV output"))
322}
323
324// ============================================================================
325// OLS Regression WASM Wrapper
326// ============================================================================
327
328#[cfg(feature = "wasm")]
329#[wasm_bindgen]
330/// Performs OLS regression via WASM.
331///
332/// All parameters and return values are JSON-encoded strings for JavaScript
333/// interoperability. Returns regression output including coefficients,
334/// standard errors, diagnostic statistics, and VIF analysis.
335///
336/// # Arguments
337///
338/// * `y_json` - JSON array of response variable values: `[1.0, 2.0, 3.0]`
339/// * `x_vars_json` - JSON array of predictor arrays: `[[1.0, 2.0], [0.5, 1.0]]`
340/// * `variable_names` - JSON array of variable names: `["Intercept", "X1", "X2"]`
341///
342/// # Returns
343///
344/// JSON string containing the complete regression output with coefficients,
345/// standard errors, t-statistics, p-values, R², F-statistic, residuals, leverage, VIF, etc.
346///
347/// # Errors
348///
349/// Returns a JSON error object if:
350/// - JSON parsing fails
351/// - Insufficient data (n ≤ k + 1)
352/// - Matrix is singular
353/// - Domain check fails
354pub fn ols_regression(y_json: &str, x_vars_json: &str, variable_names: &str) -> String {
355 if let Err(e) = check_domain() {
356 return error_to_json(&e);
357 }
358
359 // Parse JSON input
360 let y: Vec<f64> = match serde_json::from_str(y_json) {
361 Ok(v) => v,
362 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
363 };
364
365 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
366 Ok(v) => v,
367 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
368 };
369
370 let names: Vec<String> = match serde_json::from_str(variable_names) {
371 Ok(v) => v,
372 Err(_) => vec!["Intercept".to_string()],
373 };
374
375 // Call core function
376 match core::ols_regression(&y, &x_vars, &names) {
377 Ok(output) => serde_json::to_string(&output)
378 .unwrap_or_else(|_| error_json("Failed to serialize output")),
379 Err(e) => error_json(&e.to_string()),
380 }
381}
382
383// ============================================================================
384// Diagnostic Tests WASM Wrappers
385// ============================================================================
386
387/// Performs the Rainbow test for linearity via WASM.
388///
389/// The Rainbow test checks whether the relationship between predictors and response
390/// is linear. A significant p-value suggests non-linearity.
391///
392/// # Arguments
393///
394/// * `y_json` - JSON array of response variable values
395/// * `x_vars_json` - JSON array of predictor arrays
396/// * `fraction` - Fraction of data to use in the central subset (0.0 to 1.0, typically 0.5)
397/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
398///
399/// # Returns
400///
401/// JSON string containing test statistic, p-value, and interpretation.
402///
403/// # Errors
404///
405/// Returns a JSON error object if parsing fails or domain check fails.
406#[cfg(feature = "wasm")]
407#[wasm_bindgen]
408pub fn rainbow_test(y_json: &str, x_vars_json: &str, fraction: f64, method: &str) -> String {
409 if let Err(e) = check_domain() {
410 return error_to_json(&e);
411 }
412
413 let y: Vec<f64> = match serde_json::from_str(y_json) {
414 Ok(v) => v,
415 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
416 };
417
418 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
419 Ok(v) => v,
420 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
421 };
422
423 // Parse method parameter (default to "r" for R)
424 let method = match method.to_lowercase().as_str() {
425 "python" => diagnostics::RainbowMethod::Python,
426 "both" => diagnostics::RainbowMethod::Both,
427 _ => diagnostics::RainbowMethod::R, // Default to R
428 };
429
430 match diagnostics::rainbow_test(&y, &x_vars, fraction, method) {
431 Ok(output) => serde_json::to_string(&output)
432 .unwrap_or_else(|_| error_json("Failed to serialize Rainbow test result")),
433 Err(e) => error_json(&e.to_string()),
434 }
435}
436
437/// Performs the Harvey-Collier test for linearity via WASM.
438///
439/// The Harvey-Collier test checks whether the residuals exhibit a linear trend,
440/// which would indicate that the model's functional form is misspecified.
441/// A significant p-value suggests non-linearity.
442///
443/// # Arguments
444///
445/// * `y_json` - JSON array of response variable values
446/// * `x_vars_json` - JSON array of predictor arrays
447///
448/// # Returns
449///
450/// JSON string containing test statistic, p-value, and interpretation.
451///
452/// # Errors
453///
454/// Returns a JSON error object if parsing fails or domain check fails.
455#[cfg(feature = "wasm")]
456#[wasm_bindgen]
457pub fn harvey_collier_test(y_json: &str, x_vars_json: &str) -> String {
458 if let Err(e) = check_domain() {
459 return error_to_json(&e);
460 }
461
462 let y: Vec<f64> = match serde_json::from_str(y_json) {
463 Ok(v) => v,
464 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
465 };
466
467 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
468 Ok(v) => v,
469 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
470 };
471
472 match diagnostics::harvey_collier_test(&y, &x_vars) {
473 Ok(output) => serde_json::to_string(&output)
474 .unwrap_or_else(|_| error_json("Failed to serialize Harvey-Collier test result")),
475 Err(e) => error_json(&e.to_string()),
476 }
477}
478
479/// Performs the Breusch-Pagan test for heteroscedasticity via WASM.
480///
481/// The Breusch-Pagan test checks whether the variance of residuals is constant
482/// across the range of predicted values (homoscedasticity assumption).
483/// A significant p-value suggests heteroscedasticity.
484///
485/// # Arguments
486///
487/// * `y_json` - JSON array of response variable values
488/// * `x_vars_json` - JSON array of predictor arrays
489///
490/// # Returns
491///
492/// JSON string containing test statistic, p-value, and interpretation.
493///
494/// # Errors
495///
496/// Returns a JSON error object if parsing fails or domain check fails.
497#[cfg(feature = "wasm")]
498#[wasm_bindgen]
499pub fn breusch_pagan_test(y_json: &str, x_vars_json: &str) -> String {
500 if let Err(e) = check_domain() {
501 return error_to_json(&e);
502 }
503
504 let y: Vec<f64> = match serde_json::from_str(y_json) {
505 Ok(v) => v,
506 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
507 };
508
509 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
510 Ok(v) => v,
511 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
512 };
513
514 match diagnostics::breusch_pagan_test(&y, &x_vars) {
515 Ok(output) => serde_json::to_string(&output)
516 .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Pagan test result")),
517 Err(e) => error_json(&e.to_string()),
518 }
519}
520
521/// Performs the White test for heteroscedasticity via WASM.
522///
523/// The White test is a more general test for heteroscedasticity that does not
524/// assume a specific form of heteroscedasticity. A significant p-value suggests
525/// that the error variance is not constant.
526///
527/// # Arguments
528///
529/// * `y_json` - JSON array of response variable values
530/// * `x_vars_json` - JSON array of predictor arrays
531/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
532///
533/// # Returns
534///
535/// JSON string containing test statistic, p-value, and interpretation.
536///
537/// # Errors
538///
539/// Returns a JSON error object if parsing fails or domain check fails.
540#[cfg(feature = "wasm")]
541#[wasm_bindgen]
542pub fn white_test(y_json: &str, x_vars_json: &str, method: &str) -> String {
543 if let Err(e) = check_domain() {
544 return error_to_json(&e);
545 }
546
547 let y: Vec<f64> = match serde_json::from_str(y_json) {
548 Ok(v) => v,
549 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
550 };
551
552 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
553 Ok(v) => v,
554 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
555 };
556
557 // Parse method parameter (default to "r" for R)
558 let method = match method.to_lowercase().as_str() {
559 "python" => diagnostics::WhiteMethod::Python,
560 "both" => diagnostics::WhiteMethod::Both,
561 _ => diagnostics::WhiteMethod::R, // Default to R
562 };
563
564 match diagnostics::white_test(&y, &x_vars, method) {
565 Ok(output) => serde_json::to_string(&output)
566 .unwrap_or_else(|_| error_json("Failed to serialize White test result")),
567 Err(e) => error_json(&e.to_string()),
568 }
569}
570
571/// Performs the R method White test for heteroscedasticity via WASM.
572///
573/// This implementation matches R's `skedastic::white()` function behavior.
574/// Uses the standard QR decomposition and the R-specific auxiliary matrix
575/// structure (intercept, X, X² only - no cross-products).
576///
577/// # Arguments
578///
579/// * `y_json` - JSON array of response variable values
580/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
581///
582/// # Returns
583///
584/// JSON string containing test statistic, p-value, and interpretation.
585///
586/// # Errors
587///
588/// Returns a JSON error object if parsing fails or domain check fails.
589#[cfg(feature = "wasm")]
590#[wasm_bindgen]
591pub fn r_white_test(y_json: &str, x_vars_json: &str) -> String {
592 if let Err(e) = check_domain() {
593 return error_to_json(&e);
594 }
595
596 let y: Vec<f64> = match serde_json::from_str(y_json) {
597 Ok(v) => v,
598 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
599 };
600
601 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
602 Ok(v) => v,
603 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
604 };
605
606 match diagnostics::r_white_method(&y, &x_vars) {
607 Ok(output) => serde_json::to_string(&output)
608 .unwrap_or_else(|_| error_json("Failed to serialize R White test result")),
609 Err(e) => error_json(&e.to_string()),
610 }
611}
612
613/// Performs the Python method White test for heteroscedasticity via WASM.
614///
615/// This implementation matches Python's `statsmodels.stats.diagnostic.het_white()` function.
616/// Uses the LINPACK QR decomposition with column pivoting and the Python-specific
617/// auxiliary matrix structure (intercept, X, X², and cross-products).
618///
619/// # Arguments
620///
621/// * `y_json` - JSON array of response variable values
622/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
623///
624/// # Returns
625///
626/// JSON string containing test statistic, p-value, and interpretation.
627///
628/// # Errors
629///
630/// Returns a JSON error object if parsing fails or domain check fails.
631#[cfg(feature = "wasm")]
632#[wasm_bindgen]
633pub fn python_white_test(y_json: &str, x_vars_json: &str) -> String {
634 if let Err(e) = check_domain() {
635 return error_to_json(&e);
636 }
637
638 let y: Vec<f64> = match serde_json::from_str(y_json) {
639 Ok(v) => v,
640 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
641 };
642
643 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
644 Ok(v) => v,
645 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
646 };
647
648 match diagnostics::python_white_method(&y, &x_vars) {
649 Ok(output) => serde_json::to_string(&output)
650 .unwrap_or_else(|_| error_json("Failed to serialize Python White test result")),
651 Err(e) => error_json(&e.to_string()),
652 }
653}
654
655/// Performs the Jarque-Bera test for normality via WASM.
656///
657/// The Jarque-Bera test checks whether the residuals are normally distributed
658/// by examining skewness and kurtosis. A significant p-value suggests that
659/// the residuals deviate from normality.
660///
661/// # Arguments
662///
663/// * `y_json` - JSON array of response variable values
664/// * `x_vars_json` - JSON array of predictor arrays
665///
666/// # Returns
667///
668/// JSON string containing test statistic, p-value, and interpretation.
669///
670/// # Errors
671///
672/// Returns a JSON error object if parsing fails or domain check fails.
673#[cfg(feature = "wasm")]
674#[wasm_bindgen]
675pub fn jarque_bera_test(y_json: &str, x_vars_json: &str) -> String {
676 if let Err(e) = check_domain() {
677 return error_to_json(&e);
678 }
679
680 let y: Vec<f64> = match serde_json::from_str(y_json) {
681 Ok(v) => v,
682 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
683 };
684
685 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
686 Ok(v) => v,
687 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
688 };
689
690 match diagnostics::jarque_bera_test(&y, &x_vars) {
691 Ok(output) => serde_json::to_string(&output)
692 .unwrap_or_else(|_| error_json("Failed to serialize Jarque-Bera test result")),
693 Err(e) => error_json(&e.to_string()),
694 }
695}
696
697// ============================================================================
698// Durbin-Watson Test (WASM wrapper)
699// ============================================================================
700
701/// Performs the Durbin-Watson test for autocorrelation via WASM.
702///
703/// The Durbin-Watson test checks for autocorrelation in the residuals.
704/// Values near 2 indicate no autocorrelation, values near 0 suggest positive
705/// autocorrelation, and values near 4 suggest negative autocorrelation.
706///
707/// # Arguments
708///
709/// * `y_json` - JSON array of response variable values
710/// * `x_vars_json` - JSON array of predictor arrays
711///
712/// # Returns
713///
714/// JSON string containing the DW statistic and interpretation.
715///
716/// # Errors
717///
718/// Returns a JSON error object if parsing fails or domain check fails.
719#[cfg(feature = "wasm")]
720#[wasm_bindgen]
721pub fn durbin_watson_test(y_json: &str, x_vars_json: &str) -> String {
722 if let Err(e) = check_domain() {
723 return error_to_json(&e);
724 }
725
726 let y: Vec<f64> = match serde_json::from_str(y_json) {
727 Ok(v) => v,
728 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
729 };
730
731 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
732 Ok(v) => v,
733 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
734 };
735
736 match diagnostics::durbin_watson_test(&y, &x_vars) {
737 Ok(output) => serde_json::to_string(&output)
738 .unwrap_or_else(|_| error_json("Failed to serialize Durbin-Watson test result")),
739 Err(e) => error_json(&e.to_string()),
740 }
741}
742
743// ============================================================================
744// Shapiro-Wilk Test (WASM wrapper)
745// ============================================================================
746
747/// Performs the Shapiro-Wilk test for normality via WASM.
748///
749/// The Shapiro-Wilk test is a powerful test for normality,
750/// especially for small to moderate sample sizes (3 ≤ n ≤ 5000). It tests
751/// the null hypothesis that the residuals are normally distributed.
752///
753/// # Arguments
754///
755/// * `y_json` - JSON array of response variable values
756/// * `x_vars_json` - JSON array of predictor arrays
757///
758/// # Returns
759///
760/// JSON string containing the W statistic (ranges from 0 to 1), p-value,
761/// and interpretation.
762///
763/// # Errors
764///
765/// Returns a JSON error object if parsing fails or domain check fails.
766#[cfg(feature = "wasm")]
767#[wasm_bindgen]
768pub fn shapiro_wilk_test(y_json: &str, x_vars_json: &str) -> String {
769 if let Err(e) = check_domain() {
770 return error_to_json(&e);
771 }
772
773 let y: Vec<f64> = match serde_json::from_str(y_json) {
774 Ok(v) => v,
775 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
776 };
777
778 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
779 Ok(v) => v,
780 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
781 };
782
783 match diagnostics::shapiro_wilk_test(&y, &x_vars) {
784 Ok(output) => serde_json::to_string(&output)
785 .unwrap_or_else(|_| error_json("Failed to serialize Shapiro-Wilk test result")),
786 Err(e) => error_json(&e.to_string()),
787 }
788}
789
790/// Performs the Anderson-Darling test for normality via WASM.
791///
792/// The Anderson-Darling test checks whether the residuals are normally distributed
793/// by comparing the empirical distribution to the expected normal distribution.
794/// This test is particularly sensitive to deviations in the tails of the distribution.
795/// A significant p-value suggests that the residuals deviate from normality.
796///
797/// # Arguments
798///
799/// * `y_json` - JSON array of response variable values
800/// * `x_vars_json` - JSON array of predictor arrays
801///
802/// # Returns
803///
804/// JSON string containing the A² statistic, p-value, and interpretation.
805///
806/// # Errors
807///
808/// Returns a JSON error object if parsing fails or domain check fails.
809#[cfg(feature = "wasm")]
810#[wasm_bindgen]
811pub fn anderson_darling_test(y_json: &str, x_vars_json: &str) -> String {
812 if let Err(e) = check_domain() {
813 return error_to_json(&e);
814 }
815
816 let y: Vec<f64> = match serde_json::from_str(y_json) {
817 Ok(v) => v,
818 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
819 };
820
821 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
822 Ok(v) => v,
823 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
824 };
825
826 match diagnostics::anderson_darling_test(&y, &x_vars) {
827 Ok(output) => serde_json::to_string(&output)
828 .unwrap_or_else(|_| error_json("Failed to serialize Anderson-Darling test result")),
829 Err(e) => error_json(&e.to_string()),
830 }
831}
832
833// ============================================================================
834// Cook's Distance (WASM wrapper)
835// ============================================================================
836
837/// Computes Cook's distance for identifying influential observations via WASM.
838///
839/// Cook's distance measures how much each observation influences the regression
840/// model by comparing coefficient estimates with and without that observation.
841/// Unlike hypothesis tests, this is an influence measure - not a test with p-values.
842///
843/// # Arguments
844///
845/// * `y_json` - JSON array of response variable values
846/// * `x_vars_json` - JSON array of predictor arrays
847///
848/// # Returns
849///
850/// JSON string containing:
851/// - Vector of Cook's distances (one per observation)
852/// - Thresholds for identifying influential observations
853/// - Indices of potentially influential observations
854/// - Interpretation and guidance
855///
856/// # Errors
857///
858/// Returns a JSON error object if parsing fails or domain check fails.
859#[cfg(feature = "wasm")]
860#[wasm_bindgen]
861pub fn cooks_distance_test(y_json: &str, x_vars_json: &str) -> String {
862 if let Err(e) = check_domain() {
863 return error_to_json(&e);
864 }
865
866 let y: Vec<f64> = match serde_json::from_str(y_json) {
867 Ok(v) => v,
868 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
869 };
870
871 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
872 Ok(v) => v,
873 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
874 };
875
876 match diagnostics::cooks_distance_test(&y, &x_vars) {
877 Ok(output) => serde_json::to_string(&output)
878 .unwrap_or_else(|_| error_json("Failed to serialize Cook's distance result")),
879 Err(e) => error_json(&e.to_string()),
880 }
881}
882
883/// Performs the RESET test for model specification error via WASM.
884///
885/// The RESET (Regression Specification Error Test) test checks whether the model
886/// is correctly specified by testing if additional terms (powers of fitted values,
887/// regressors, or first principal component) significantly improve the model fit.
888///
889/// # Arguments
890///
891/// * `y_json` - JSON array of response variable values
892/// * `x_vars_json` - JSON array of predictor arrays
893/// * `powers_json` - JSON array of powers to use (e.g., [2, 3] for ŷ², ŷ³)
894/// * `type_` - Type of terms to add: "fitted", "regressor", or "princomp"
895///
896/// # Returns
897///
898/// JSON string containing the F-statistic, p-value, and interpretation.
899///
900/// # Errors
901///
902/// Returns a JSON error object if parsing fails or domain check fails.
903#[cfg(feature = "wasm")]
904#[wasm_bindgen]
905pub fn reset_test(y_json: &str, x_vars_json: &str, powers_json: &str, type_: &str) -> String {
906 if let Err(e) = check_domain() {
907 return error_to_json(&e);
908 }
909
910 let y: Vec<f64> = match serde_json::from_str(y_json) {
911 Ok(v) => v,
912 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
913 };
914
915 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
916 Ok(v) => v,
917 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
918 };
919
920 let powers: Vec<usize> = match serde_json::from_str(powers_json) {
921 Ok(v) => v,
922 Err(e) => return error_json(&format!("Failed to parse powers: {}", e)),
923 };
924
925 // Parse reset type (default to "fitted")
926 let reset_type = match type_.to_lowercase().as_str() {
927 "regressor" => diagnostics::ResetType::Regressor,
928 "princomp" => diagnostics::ResetType::PrincipalComponent,
929 _ => diagnostics::ResetType::Fitted,
930 };
931
932 match diagnostics::reset_test(&y, &x_vars, &powers, reset_type) {
933 Ok(output) => serde_json::to_string(&output)
934 .unwrap_or_else(|_| error_json("Failed to serialize RESET test result")),
935 Err(e) => error_json(&e.to_string()),
936 }
937}
938
939/// Performs the Breusch-Godfrey test for higher-order serial correlation via WASM.
940///
941/// Unlike the Durbin-Watson test which only detects first-order autocorrelation,
942/// the Breusch-Godfrey test can detect serial correlation at any lag order.
943///
944/// # Arguments
945///
946/// * `y_json` - JSON array of response variable values
947/// * `x_vars_json` - JSON array of predictor arrays
948/// * `order` - Maximum order of serial correlation to test (default: 1)
949/// * `test_type` - Type of test statistic: "chisq" or "f" (default: "chisq")
950///
951/// # Returns
952///
953/// JSON string containing test statistic, p-value, degrees of freedom, and interpretation.
954///
955/// # Errors
956///
957/// Returns a JSON error object if parsing fails or domain check fails.
958#[cfg(feature = "wasm")]
959#[wasm_bindgen]
960pub fn breusch_godfrey_test(y_json: &str, x_vars_json: &str, order: usize, test_type: &str) -> String {
961 if let Err(e) = check_domain() {
962 return error_to_json(&e);
963 }
964
965 let y: Vec<f64> = match serde_json::from_str(y_json) {
966 Ok(v) => v,
967 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
968 };
969
970 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
971 Ok(v) => v,
972 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
973 };
974
975 // Parse test type (default to "chisq")
976 let bg_test_type = match test_type.to_lowercase().as_str() {
977 "f" => diagnostics::BGTestType::F,
978 _ => diagnostics::BGTestType::Chisq,
979 };
980
981 match diagnostics::breusch_godfrey_test(&y, &x_vars, order, bg_test_type) {
982 Ok(output) => serde_json::to_string(&output)
983 .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Godfrey test result")),
984 Err(e) => error_json(&e.to_string()),
985 }
986}
987
988// ============================================================================
989// Regularized Regression WASM Wrappers
990// ============================================================================
991
992#[cfg(feature = "wasm")]
993#[wasm_bindgen]
994/// Performs Ridge regression via WASM.
995///
996/// Ridge regression adds an L2 penalty to the coefficients, which helps with
997/// multicollinearity and overfitting. The intercept is never penalized.
998///
999/// # Arguments
1000///
1001/// * `y_json` - JSON array of response variable values
1002/// * `x_vars_json` - JSON array of predictor arrays
1003/// * `variable_names` - JSON array of variable names
1004/// * `lambda` - Regularization strength (>= 0, typical range 0.01 to 100)
1005/// * `standardize` - Whether to standardize predictors (recommended: true)
1006///
1007/// # Returns
1008///
1009/// JSON string containing:
1010/// - `lambda` - Lambda value used
1011/// - `intercept` - Intercept coefficient
1012/// - `coefficients` - Slope coefficients
1013/// - `fitted_values` - Predictions on training data
1014/// - `residuals` - Residuals (y - fitted_values)
1015/// - `df` - Effective degrees of freedom
1016///
1017/// # Errors
1018///
1019/// Returns a JSON error object if parsing fails, lambda is negative,
1020/// or domain check fails.
1021pub fn ridge_regression(
1022 y_json: &str,
1023 x_vars_json: &str,
1024 _variable_names: &str,
1025 lambda: f64,
1026 standardize: bool,
1027) -> String {
1028 if let Err(e) = check_domain() {
1029 return error_to_json(&e);
1030 }
1031
1032 // Parse JSON input
1033 let y: Vec<f64> = match serde_json::from_str(y_json) {
1034 Ok(v) => v,
1035 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1036 };
1037
1038 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1039 Ok(v) => v,
1040 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1041 };
1042
1043 // Build design matrix with intercept column
1044 let n = y.len();
1045 let p = x_vars.len();
1046
1047 if n <= p + 1 {
1048 return error_json(&format!(
1049 "Insufficient data: need at least {} observations for {} predictors",
1050 p + 2,
1051 p
1052 ));
1053 }
1054
1055 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1056 for (j, x_var) in x_vars.iter().enumerate() {
1057 if x_var.len() != n {
1058 return error_json(&format!(
1059 "x_vars[{}] has {} elements, expected {}",
1060 j,
1061 x_var.len(),
1062 n
1063 ));
1064 }
1065 for (i, &val) in x_var.iter().enumerate() {
1066 x_data[i * (p + 1) + j + 1] = val;
1067 }
1068 }
1069
1070 let x = linalg::Matrix::new(n, p + 1, x_data);
1071
1072 // Configure ridge options
1073 let options = regularized::ridge::RidgeFitOptions {
1074 lambda,
1075 intercept: true,
1076 standardize,
1077 max_iter: 100000,
1078 tol: 1e-7,
1079 warm_start: None,
1080 weights: None,
1081 };
1082
1083 match regularized::ridge::ridge_fit(&x, &y, &options) {
1084 Ok(output) => serde_json::to_string(&output)
1085 .unwrap_or_else(|_| error_json("Failed to serialize ridge regression result")),
1086 Err(e) => error_json(&e.to_string()),
1087 }
1088}
1089
1090#[cfg(feature = "wasm")]
1091#[wasm_bindgen]
1092/// Performs Lasso regression via WASM.
1093///
1094/// Lasso regression adds an L1 penalty to the coefficients, which performs
1095/// automatic variable selection by shrinking some coefficients to exactly zero.
1096/// The intercept is never penalized.
1097///
1098/// # Arguments
1099///
1100/// * `y_json` - JSON array of response variable values
1101/// * `x_vars_json` - JSON array of predictor arrays
1102/// * `variable_names` - JSON array of variable names
1103/// * `lambda` - Regularization strength (>= 0, typical range 0.01 to 10)
1104/// * `standardize` - Whether to standardize predictors (recommended: true)
1105/// * `max_iter` - Maximum coordinate descent iterations (default: 100000)
1106/// * `tol` - Convergence tolerance (default: 1e-7)
1107///
1108/// # Returns
1109///
1110/// JSON string containing:
1111/// - `lambda` - Lambda value used
1112/// - `intercept` - Intercept coefficient
1113/// - `coefficients` - Slope coefficients (some may be exactly zero)
1114/// - `fitted_values` - Predictions on training data
1115/// - `residuals` - Residuals (y - fitted_values)
1116/// - `n_nonzero` - Number of non-zero coefficients (excluding intercept)
1117/// - `iterations` - Number of coordinate descent iterations
1118/// - `converged` - Whether the algorithm converged
1119///
1120/// # Errors
1121///
1122/// Returns a JSON error object if parsing fails, lambda is negative,
1123/// or domain check fails.
1124pub fn lasso_regression(
1125 y_json: &str,
1126 x_vars_json: &str,
1127 _variable_names: &str,
1128 lambda: f64,
1129 standardize: bool,
1130 max_iter: usize,
1131 tol: f64,
1132) -> String {
1133 if let Err(e) = check_domain() {
1134 return error_to_json(&e);
1135 }
1136
1137 // Parse JSON input
1138 let y: Vec<f64> = match serde_json::from_str(y_json) {
1139 Ok(v) => v,
1140 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1141 };
1142
1143 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1144 Ok(v) => v,
1145 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1146 };
1147
1148 // Build design matrix with intercept column
1149 let n = y.len();
1150 let p = x_vars.len();
1151
1152 if n <= p + 1 {
1153 return error_json(&format!(
1154 "Insufficient data: need at least {} observations for {} predictors",
1155 p + 2,
1156 p
1157 ));
1158 }
1159
1160 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1161 for (j, x_var) in x_vars.iter().enumerate() {
1162 if x_var.len() != n {
1163 return error_json(&format!(
1164 "x_vars[{}] has {} elements, expected {}",
1165 j,
1166 x_var.len(),
1167 n
1168 ));
1169 }
1170 for (i, &val) in x_var.iter().enumerate() {
1171 x_data[i * (p + 1) + j + 1] = val;
1172 }
1173 }
1174
1175 let x = linalg::Matrix::new(n, p + 1, x_data);
1176
1177 // Configure lasso options
1178 let options = regularized::lasso::LassoFitOptions {
1179 lambda,
1180 intercept: true,
1181 standardize,
1182 max_iter,
1183 tol,
1184 ..Default::default()
1185 };
1186
1187 match regularized::lasso::lasso_fit(&x, &y, &options) {
1188 Ok(output) => serde_json::to_string(&output)
1189 .unwrap_or_else(|_| error_json("Failed to serialize lasso regression result")),
1190 Err(e) => error_json(&e.to_string()),
1191 }
1192}
1193
1194#[cfg(feature = "wasm")]
1195#[wasm_bindgen]
1196#[allow(clippy::too_many_arguments)]
1197/// Performs Elastic Net regression via WASM.
1198///
1199/// Elastic Net combines L1 (Lasso) and L2 (Ridge) penalties.
1200///
1201/// # Arguments
1202///
1203/// * `y_json` - JSON array of response variable values
1204/// * `x_vars_json` - JSON array of predictor arrays
1205/// * `variable_names` - JSON array of variable names
1206/// * `lambda` - Regularization strength (>= 0)
1207/// * `alpha` - Elastic net mixing parameter (0 = Ridge, 1 = Lasso)
1208/// * `standardize` - Whether to standardize predictors (recommended: true)
1209/// * `max_iter` - Maximum coordinate descent iterations
1210/// * `tol` - Convergence tolerance
1211///
1212/// # Returns
1213///
1214/// JSON string containing regression results (same fields as Lasso).
1215///
1216/// # Errors
1217///
1218/// Returns a JSON error object if parsing fails, parameters are invalid,
1219/// or domain check fails.
1220pub fn elastic_net_regression(
1221 y_json: &str,
1222 x_vars_json: &str,
1223 _variable_names: &str,
1224 lambda: f64,
1225 alpha: f64,
1226 standardize: bool,
1227 max_iter: usize,
1228 tol: f64,
1229) -> String {
1230 if let Err(e) = check_domain() {
1231 return error_to_json(&e);
1232 }
1233
1234 // Parse JSON input
1235 let y: Vec<f64> = match serde_json::from_str(y_json) {
1236 Ok(v) => v,
1237 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1238 };
1239
1240 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1241 Ok(v) => v,
1242 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1243 };
1244
1245 // Build design matrix with intercept column
1246 let n = y.len();
1247 let p = x_vars.len();
1248
1249 if n <= p + 1 {
1250 return error_json(&format!(
1251 "Insufficient data: need at least {} observations for {} predictors",
1252 p + 2,
1253 p
1254 ));
1255 }
1256
1257 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1258 for (j, x_var) in x_vars.iter().enumerate() {
1259 if x_var.len() != n {
1260 return error_json(&format!(
1261 "x_vars[{}] has {} elements, expected {}",
1262 j,
1263 x_var.len(),
1264 n
1265 ));
1266 }
1267 for (i, &val) in x_var.iter().enumerate() {
1268 x_data[i * (p + 1) + j + 1] = val;
1269 }
1270 }
1271
1272 let x = linalg::Matrix::new(n, p + 1, x_data);
1273
1274 // Configure elastic net options
1275 let options = regularized::elastic_net::ElasticNetOptions {
1276 lambda,
1277 alpha,
1278 intercept: true,
1279 standardize,
1280 max_iter,
1281 tol,
1282 ..Default::default()
1283 };
1284
1285 match regularized::elastic_net::elastic_net_fit(&x, &y, &options) {
1286 Ok(output) => serde_json::to_string(&output)
1287 .unwrap_or_else(|_| error_json("Failed to serialize elastic net regression result")),
1288 Err(e) => error_json(&e.to_string()),
1289 }
1290}
1291
1292#[cfg(feature = "wasm")]
1293#[wasm_bindgen]
1294/// Generates a lambda path for regularized regression via WASM.
1295///
1296/// Creates a logarithmically-spaced sequence of lambda values from lambda_max
1297/// (where all penalized coefficients are zero) down to lambda_min. This is
1298/// useful for fitting regularization paths and selecting optimal lambda via
1299/// cross-validation.
1300///
1301/// # Arguments
1302///
1303/// * `y_json` - JSON array of response variable values
1304/// * `x_vars_json` - JSON array of predictor arrays
1305/// * `n_lambda` - Number of lambda values to generate (default: 100)
1306/// * `lambda_min_ratio` - Ratio for smallest lambda (default: 0.0001 if n >= p, else 0.01)
1307///
1308/// # Returns
1309///
1310/// JSON string containing:
1311/// - `lambda_path` - Array of lambda values in decreasing order
1312/// - `lambda_max` - Maximum lambda value
1313/// - `lambda_min` - Minimum lambda value
1314/// - `n_lambda` - Number of lambda values
1315///
1316/// # Errors
1317///
1318/// Returns a JSON error object if parsing fails or domain check fails.
1319pub fn make_lambda_path(
1320 y_json: &str,
1321 x_vars_json: &str,
1322 n_lambda: usize,
1323 lambda_min_ratio: f64,
1324) -> String {
1325 if let Err(e) = check_domain() {
1326 return error_to_json(&e);
1327 }
1328
1329 // Parse JSON input
1330 let y: Vec<f64> = match serde_json::from_str(y_json) {
1331 Ok(v) => v,
1332 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
1333 };
1334
1335 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
1336 Ok(v) => v,
1337 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
1338 };
1339
1340 // Build design matrix with intercept column
1341 let n = y.len();
1342 let p = x_vars.len();
1343
1344 let mut x_data = vec![1.0; n * (p + 1)]; // Intercept column
1345 for (j, x_var) in x_vars.iter().enumerate() {
1346 if x_var.len() != n {
1347 return error_json(&format!(
1348 "x_vars[{}] has {} elements, expected {}",
1349 j,
1350 x_var.len(),
1351 n
1352 ));
1353 }
1354 for (i, &val) in x_var.iter().enumerate() {
1355 x_data[i * (p + 1) + j + 1] = val;
1356 }
1357 }
1358
1359 let x = linalg::Matrix::new(n, p + 1, x_data);
1360
1361 // Standardize X for lambda path computation
1362 let x_mean: Vec<f64> = (0..x.cols)
1363 .map(|j| {
1364 if j == 0 {
1365 1.0 // Intercept column
1366 } else {
1367 (0..n).map(|i| x.get(i, j)).sum::<f64>() / n as f64
1368 }
1369 })
1370 .collect();
1371
1372 let x_standardized: Vec<f64> = (0..x.cols)
1373 .map(|j| {
1374 if j == 0 {
1375 0.0 // Intercept column - no centering
1376 } else {
1377 let mean = x_mean[j];
1378 let variance =
1379 (0..n).map(|i| (x.get(i, j) - mean).powi(2)).sum::<f64>() / (n - 1) as f64;
1380 variance.sqrt()
1381 }
1382 })
1383 .collect();
1384
1385 // Build standardized X matrix
1386 let mut x_standardized_data = vec![1.0; n * (p + 1)];
1387 for j in 0..x.cols {
1388 for i in 0..n {
1389 if j == 0 {
1390 x_standardized_data[i * (p + 1)] = 1.0; // Intercept
1391 } else {
1392 let std = x_standardized[j];
1393 if std > 1e-10 {
1394 x_standardized_data[i * (p + 1) + j] = (x.get(i, j) - x_mean[j]) / std;
1395 } else {
1396 x_standardized_data[i * (p + 1) + j] = 0.0;
1397 }
1398 }
1399 }
1400 }
1401 let x_standardized = linalg::Matrix::new(n, p + 1, x_standardized_data);
1402
1403 // Center y
1404 let y_mean: f64 = y.iter().sum::<f64>() / n as f64;
1405 let y_centered: Vec<f64> = y.iter().map(|&yi| yi - y_mean).collect();
1406
1407 // Configure lambda path options
1408 let options = regularized::path::LambdaPathOptions {
1409 nlambda: n_lambda.max(1),
1410 lambda_min_ratio: if lambda_min_ratio > 0.0 {
1411 Some(lambda_min_ratio)
1412 } else {
1413 None
1414 },
1415 alpha: 1.0, // Lasso
1416 ..Default::default()
1417 };
1418
1419 let lambda_path =
1420 regularized::path::make_lambda_path(&x_standardized, &y_centered, &options, None, Some(0));
1421
1422 let lambda_max = lambda_path.first().copied().unwrap_or(0.0);
1423 let lambda_min = lambda_path.last().copied().unwrap_or(0.0);
1424
1425 // Return as JSON (note: infinity serializes as null in JSON, handled in JS)
1426 let result = serde_json::json!({
1427 "lambda_path": lambda_path,
1428 "lambda_max": lambda_max,
1429 "lambda_min": lambda_min,
1430 "n_lambda": lambda_path.len()
1431 });
1432
1433 result.to_string()
1434}
1435
1436// ============================================================================
1437// Statistical Utility Functions (WASM wrappers)
1438// ============================================================================
1439
1440#[cfg(feature = "wasm")]
1441#[wasm_bindgen]
1442/// Computes the Student's t-distribution cumulative distribution function.
1443///
1444/// Returns P(T ≤ t) for a t-distribution with the given degrees of freedom.
1445///
1446/// # Arguments
1447///
1448/// * `t` - t-statistic value
1449/// * `df` - Degrees of freedom
1450///
1451/// # Returns
1452///
1453/// The CDF value, or `NaN` if domain check fails.
1454pub fn get_t_cdf(t: f64, df: f64) -> f64 {
1455 if check_domain().is_err() {
1456 return f64::NAN;
1457 }
1458
1459 student_t_cdf(t, df)
1460}
1461
1462#[cfg(feature = "wasm")]
1463#[wasm_bindgen]
1464/// Computes the critical t-value for a given significance level.
1465///
1466/// Returns the t-value such that the area under the t-distribution curve
1467/// to the right equals alpha/2 (two-tailed test).
1468///
1469/// # Arguments
1470///
1471/// * `alpha` - Significance level (typically 0.05 for 95% confidence)
1472/// * `df` - Degrees of freedom
1473///
1474/// # Returns
1475///
1476/// The critical t-value, or `NaN` if domain check fails.
1477pub fn get_t_critical(alpha: f64, df: f64) -> f64 {
1478 if check_domain().is_err() {
1479 return f64::NAN;
1480 }
1481
1482 core::t_critical_quantile(df, alpha)
1483}
1484
1485#[cfg(feature = "wasm")]
1486#[wasm_bindgen]
1487/// Computes the inverse of the standard normal CDF (probit function).
1488///
1489/// Returns the z-score such that P(Z ≤ z) = p for a standard normal distribution.
1490///
1491/// # Arguments
1492///
1493/// * `p` - Probability (0 < p < 1)
1494///
1495/// # Returns
1496///
1497/// The z-score, or `NaN` if domain check fails.
1498pub fn get_normal_inverse(p: f64) -> f64 {
1499 if check_domain().is_err() {
1500 return f64::NAN;
1501 }
1502
1503 normal_inverse_cdf(p)
1504}
1505
1506// ============================================================================
1507// Statistical Utilities (WASM-only)
1508// ============================================================================
1509
1510#[cfg(feature = "wasm")]
1511#[wasm_bindgen]
1512/// Computes the arithmetic mean of a JSON array of f64 values.
1513///
1514/// # Arguments
1515///
1516/// * `data_json` - JSON string representing an array of f64 values
1517///
1518/// # Returns
1519///
1520/// JSON string with the mean, or "null" if input is invalid/empty
1521pub fn stats_mean(data_json: String) -> String {
1522 if check_domain().is_err() {
1523 return "null".to_string();
1524 }
1525
1526 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1527 Ok(d) => d,
1528 Err(_) => return "null".to_string(),
1529 };
1530
1531 serde_json::to_string(&stats::mean(&data)).unwrap_or("null".to_string())
1532}
1533
1534#[cfg(feature = "wasm")]
1535#[wasm_bindgen]
1536/// Computes the sample standard deviation of a JSON array of f64 values.
1537///
1538/// Uses the (n-1) denominator for unbiased estimation.
1539///
1540/// # Arguments
1541///
1542/// * `data_json` - JSON string representing an array of f64 values
1543///
1544/// # Returns
1545///
1546/// JSON string with the standard deviation, or "null" if input is invalid
1547pub fn stats_stddev(data_json: String) -> String {
1548 if check_domain().is_err() {
1549 return "null".to_string();
1550 }
1551
1552 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1553 Ok(d) => d,
1554 Err(_) => return "null".to_string(),
1555 };
1556
1557 serde_json::to_string(&stats::stddev(&data)).unwrap_or("null".to_string())
1558}
1559
1560#[cfg(feature = "wasm")]
1561#[wasm_bindgen]
1562/// Computes the sample variance of a JSON array of f64 values.
1563///
1564/// Uses the (n-1) denominator for unbiased estimation.
1565///
1566/// # Arguments
1567///
1568/// * `data_json` - JSON string representing an array of f64 values
1569///
1570/// # Returns
1571///
1572/// JSON string with the variance, or "null" if input is invalid
1573pub fn stats_variance(data_json: String) -> String {
1574 if check_domain().is_err() {
1575 return "null".to_string();
1576 }
1577
1578 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1579 Ok(d) => d,
1580 Err(_) => return "null".to_string(),
1581 };
1582
1583 serde_json::to_string(&stats::variance(&data)).unwrap_or("null".to_string())
1584}
1585
1586#[cfg(feature = "wasm")]
1587#[wasm_bindgen]
1588/// Computes the median of a JSON array of f64 values.
1589///
1590/// # Arguments
1591///
1592/// * `data_json` - JSON string representing an array of f64 values
1593///
1594/// # Returns
1595///
1596/// JSON string with the median, or "null" if input is invalid/empty
1597pub fn stats_median(data_json: String) -> String {
1598 if check_domain().is_err() {
1599 return "null".to_string();
1600 }
1601
1602 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1603 Ok(d) => d,
1604 Err(_) => return "null".to_string(),
1605 };
1606
1607 serde_json::to_string(&stats::median(&data)).unwrap_or("null".to_string())
1608}
1609
1610#[cfg(feature = "wasm")]
1611#[wasm_bindgen]
1612/// Computes a quantile of a JSON array of f64 values.
1613///
1614/// # Arguments
1615///
1616/// * `data_json` - JSON string representing an array of f64 values
1617/// * `q` - Quantile to calculate (0.0 to 1.0)
1618///
1619/// # Returns
1620///
1621/// JSON string with the quantile value, or "null" if input is invalid
1622pub fn stats_quantile(data_json: String, q: f64) -> String {
1623 if check_domain().is_err() {
1624 return "null".to_string();
1625 }
1626
1627 let data: Vec<f64> = match serde_json::from_str(&data_json) {
1628 Ok(d) => d,
1629 Err(_) => return "null".to_string(),
1630 };
1631
1632 serde_json::to_string(&stats::quantile(&data, q)).unwrap_or("null".to_string())
1633}
1634
1635#[cfg(feature = "wasm")]
1636#[wasm_bindgen]
1637/// Computes the correlation coefficient between two JSON arrays of f64 values.
1638///
1639/// # Arguments
1640///
1641/// * `x_json` - JSON string representing the first array of f64 values
1642/// * `y_json` - JSON string representing the second array of f64 values
1643///
1644/// # Returns
1645///
1646/// JSON string with the correlation coefficient, or "null" if input is invalid
1647pub fn stats_correlation(x_json: String, y_json: String) -> String {
1648 if check_domain().is_err() {
1649 return "null".to_string();
1650 }
1651
1652 let x: Vec<f64> = match serde_json::from_str(&x_json) {
1653 Ok(d) => d,
1654 Err(_) => return "null".to_string(),
1655 };
1656
1657 let y: Vec<f64> = match serde_json::from_str(&y_json) {
1658 Ok(d) => d,
1659 Err(_) => return "null".to_string(),
1660 };
1661
1662 serde_json::to_string(&stats::correlation(&x, &y)).unwrap_or("null".to_string())
1663}
1664
1665// ============================================================================
1666// Domain Check (WASM-only)
1667// ============================================================================
1668//
1669// By default, all domains are allowed. To enable domain restriction, set the
1670// LINREG_DOMAIN_RESTRICT environment variable at build time:
1671//
1672// LINREG_DOMAIN_RESTRICT=example.com,yoursite.com wasm-pack build
1673//
1674// Example for jesse-anderson.net:
1675// LINREG_DOMAIN_RESTRICT=jesse-anderson.net,tools.jesse-anderson.net,localhost,127.0.0.1 wasm-pack build
1676//
1677// This allows downstream users to use the library without modification while
1678// still providing domain restriction as an opt-in security feature.
1679
1680#[cfg(feature = "wasm")]
1681fn check_domain() -> Result<()> {
1682 // Read allowed domains from build-time environment variable
1683 let allowed_domains = option_env!("LINREG_DOMAIN_RESTRICT");
1684
1685 match allowed_domains {
1686 Some(domains) if !domains.is_empty() => {
1687 // Domain restriction is enabled
1688 let window =
1689 web_sys::window().ok_or(Error::DomainCheck("No window found".to_string()))?;
1690 let location = window.location();
1691 let hostname = location
1692 .hostname()
1693 .map_err(|_| Error::DomainCheck("No hostname found".to_string()))?;
1694
1695 let domain_list: Vec<&str> = domains.split(',').map(|s| s.trim()).collect();
1696
1697 if domain_list.contains(&hostname.as_str()) {
1698 Ok(())
1699 } else {
1700 Err(Error::DomainCheck(format!(
1701 "Unauthorized domain: {}. Allowed: {}",
1702 hostname, domains
1703 )))
1704 }
1705 },
1706 _ => {
1707 // No restriction - allow all domains
1708 Ok(())
1709 },
1710 }
1711}
1712
1713// ============================================================================
1714// Test Functions (WASM-only)
1715// ============================================================================
1716
1717#[cfg(test)]
1718mod tests {
1719 use super::*;
1720
1721 #[test]
1722 fn verify_housing_regression_integrity() {
1723 let result = test_housing_regression_native();
1724 if let Err(e) = result {
1725 panic!("Regression test failed: {}", e);
1726 }
1727 }
1728
1729 /// Test that test_housing_regression_native produces valid JSON
1730 #[test]
1731 fn test_housing_regression_json_output() {
1732 let result = test_housing_regression_native().unwrap();
1733 // Should be valid JSON
1734 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
1735 // Should have status field
1736 assert!(parsed.get("status").is_some());
1737 // Status should be PASS (we control the test data)
1738 assert_eq!(parsed["status"], "PASS");
1739 }
1740
1741 /// Test housing regression with actual R reference values
1742 #[test]
1743 fn test_housing_regression_coefficients() {
1744 let y = vec![
1745 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9,
1746 367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7,
1747 267.9,
1748 ];
1749
1750 let square_feet = vec![
1751 1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1752 2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1753 1250.0, 1700.0, 850.0, 2350.0, 1400.0,
1754 ];
1755 let bedrooms = vec![
1756 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0,
1757 3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
1758 ];
1759 let age = vec![
1760 15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0,
1761 22.0, 1.0, 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
1762 ];
1763
1764 let x_vars = vec![square_feet, bedrooms, age];
1765 let names = vec![
1766 "Intercept".to_string(),
1767 "Square_Feet".to_string(),
1768 "Bedrooms".to_string(),
1769 "Age".to_string(),
1770 ];
1771
1772 let result = core::ols_regression(&y, &x_vars, &names).unwrap();
1773
1774 // Check against R results
1775 let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
1776 let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
1777
1778 let tolerance = 1e-4;
1779 for i in 0..4 {
1780 assert!(
1781 (result.coefficients[i] - expected_coeffs[i]).abs() < tolerance,
1782 "coeff[{}] differs: got {}, expected {}",
1783 i,
1784 result.coefficients[i],
1785 expected_coeffs[i]
1786 );
1787 assert!(
1788 (result.std_errors[i] - expected_std_errs[i]).abs() < tolerance,
1789 "std_err[{}] differs: got {}, expected {}",
1790 i,
1791 result.std_errors[i],
1792 expected_std_errs[i]
1793 );
1794 }
1795 }
1796
1797 /// Test R-squared calculation in housing regression
1798 #[test]
1799 fn test_housing_regression_r_squared() {
1800 let result = test_housing_regression_native().unwrap();
1801 let parsed: serde_json::Value = serde_json::from_str(&result).unwrap();
1802
1803 // If status is PASS, R² should be reasonable (between 0 and 1)
1804 assert_eq!(parsed["status"], "PASS");
1805 }
1806
1807 /// Test that housing regression handles all expected output fields
1808 #[test]
1809 fn test_housing_regression_comprehensive() {
1810 let y = vec![
1811 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1,
1812 ];
1813 let x1 = vec![1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0];
1814 let x2 = vec![3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0];
1815
1816 let result = core::ols_regression(&y, &[x1, x2], &["Intercept".into(), "X1".into(), "X2".into()])
1817 .unwrap();
1818
1819 // Verify expected output fields exist
1820 assert!(!result.coefficients.is_empty());
1821 assert!(!result.std_errors.is_empty());
1822 assert!(!result.t_stats.is_empty());
1823 assert!(!result.p_values.is_empty());
1824 assert!(result.r_squared >= 0.0 && result.r_squared <= 1.0);
1825 assert!(result.residuals.len() == y.len());
1826 }
1827
1828 /// Test error handling when insufficient data is provided
1829 #[test]
1830 fn test_housing_regression_insufficient_data() {
1831 let y = vec![245.5, 312.8]; // Only 2 observations
1832 let x1 = vec![1200.0, 1800.0];
1833 let x2 = vec![3.0, 4.0];
1834
1835 let result = core::ols_regression(&y, &[x1, x2], &["Intercept".into(), "X1".into(), "X2".into()]);
1836 assert!(result.is_err());
1837 }
1838
1839 /// Test housing regression precision with tolerance check
1840 #[test]
1841 fn test_housing_regression_tolerance_check() {
1842 let y = vec![
1843 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9,
1844 367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7,
1845 267.9,
1846 ];
1847
1848 let square_feet = vec![
1849 1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1850 2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1851 1250.0, 1700.0, 850.0, 2350.0, 1400.0,
1852 ];
1853 let bedrooms = vec![
1854 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0,
1855 3.0, 4.0, 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
1856 ];
1857 let age = vec![
1858 15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0,
1859 22.0, 1.0, 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
1860 ];
1861
1862 let x_vars = vec![square_feet, bedrooms, age];
1863 let names = vec![
1864 "Intercept".to_string(),
1865 "Square_Feet".to_string(),
1866 "Bedrooms".to_string(),
1867 "Age".to_string(),
1868 ];
1869
1870 let result = core::ols_regression(&y, &x_vars, &names).unwrap();
1871
1872 // Verify all coefficient values are finite
1873 for coef in &result.coefficients {
1874 assert!(coef.is_finite(), "Coefficient should be finite");
1875 }
1876 // Verify all standard errors are positive and finite
1877 for se in &result.std_errors {
1878 assert!(se.is_finite(), "Standard error should be finite");
1879 if *se <= 0.0 {
1880 panic!("Standard error should be positive, got {}", se);
1881 }
1882 }
1883 }
1884}
1885
1886#[cfg(feature = "wasm")]
1887#[wasm_bindgen]
1888/// Simple test function to verify WASM is working.
1889///
1890/// Returns a success message confirming the WASM module loaded correctly.
1891///
1892/// # Errors
1893///
1894/// Returns a JSON error object if domain check fails.
1895pub fn test() -> String {
1896 if let Err(e) = check_domain() {
1897 return error_to_json(&e);
1898 }
1899 "Rust WASM is working!".to_string()
1900}
1901
1902#[cfg(feature = "wasm")]
1903#[wasm_bindgen]
1904/// Returns the current version of the library.
1905///
1906/// Returns the Cargo package version as a string (e.g., "0.1.0").
1907///
1908/// # Errors
1909///
1910/// Returns a JSON error object if domain check fails.
1911pub fn get_version() -> String {
1912 if let Err(e) = check_domain() {
1913 return error_to_json(&e);
1914 }
1915 env!("CARGO_PKG_VERSION").to_string()
1916}
1917
1918#[cfg(feature = "wasm")]
1919#[wasm_bindgen]
1920/// Test function for t-critical value computation.
1921///
1922/// Returns JSON with the computed t-critical value for the given parameters.
1923///
1924/// # Errors
1925///
1926/// Returns a JSON error object if domain check fails.
1927pub fn test_t_critical(df: f64, alpha: f64) -> String {
1928 if let Err(e) = check_domain() {
1929 return error_to_json(&e);
1930 }
1931 let t_crit = core::t_critical_quantile(df, alpha);
1932 format!(
1933 r#"{{"df": {}, "alpha": {}, "t_critical": {}}}"#,
1934 df, alpha, t_crit
1935 )
1936}
1937
1938#[cfg(feature = "wasm")]
1939#[wasm_bindgen]
1940/// Test function for confidence interval computation.
1941///
1942/// Returns JSON with the computed confidence interval for a coefficient.
1943///
1944/// # Errors
1945///
1946/// Returns a JSON error object if domain check fails.
1947pub fn test_ci(coef: f64, se: f64, df: f64, alpha: f64) -> String {
1948 if let Err(e) = check_domain() {
1949 return error_to_json(&e);
1950 }
1951 let t_crit = core::t_critical_quantile(df, alpha);
1952 format!(
1953 r#"{{"lower": {}, "upper": {}}}"#,
1954 coef - t_crit * se,
1955 coef + t_crit * se
1956 )
1957}
1958
1959#[cfg(feature = "wasm")]
1960#[wasm_bindgen]
1961/// Test function for R accuracy validation.
1962///
1963/// Returns JSON comparing our statistical functions against R reference values.
1964///
1965/// # Errors
1966///
1967/// Returns a JSON error object if domain check fails.
1968pub fn test_r_accuracy() -> String {
1969 if let Err(e) = check_domain() {
1970 return error_to_json(&e);
1971 }
1972 format!(
1973 r#"{{"two_tail_p": {}, "qt_975": {}}}"#,
1974 core::two_tailed_p_value(1.6717, 21.0),
1975 core::t_critical_quantile(21.0, 0.05)
1976 )
1977}
1978
1979#[cfg(feature = "wasm")]
1980#[wasm_bindgen]
1981/// Test function for regression validation against R reference values.
1982///
1983/// Runs a regression on a housing dataset and compares results against R's lm() output.
1984/// Returns JSON with status "PASS" or "FAIL" with details.
1985///
1986/// # Errors
1987///
1988/// Returns a JSON error object if domain check fails.
1989pub fn test_housing_regression() -> String {
1990 if let Err(e) = check_domain() {
1991 return error_to_json(&e);
1992 }
1993
1994 match test_housing_regression_native() {
1995 Ok(result) => result,
1996 Err(e) => serde_json::json!({ "status": "ERROR", "error": e.to_string() }).to_string(),
1997 }
1998}
1999
2000// Native Rust test function (works without WASM feature)
2001#[cfg(any(test, feature = "wasm"))]
2002fn test_housing_regression_native() -> Result<String> {
2003 let y = vec![
2004 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1, 445.8, 167.9, 367.4,
2005 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9, 223.4, 312.5, 156.8, 423.7, 267.9,
2006 ];
2007
2008 let square_feet = vec![
2009 1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0, 2200.0,
2010 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0, 1250.0, 1700.0,
2011 850.0, 2350.0, 1400.0,
2012 ];
2013 let bedrooms = vec![
2014 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0, 4.0, 2.0, 4.0, 3.0, 2.0, 5.0, 3.0, 4.0,
2015 2.0, 4.0, 3.0, 3.0, 2.0, 4.0, 3.0,
2016 ];
2017 let age = vec![
2018 15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0, 3.0, 30.0, 6.0, 14.0, 22.0, 1.0,
2019 16.0, 9.0, 28.0, 4.0, 19.0, 11.0, 35.0, 3.0, 13.0,
2020 ];
2021
2022 let x_vars = vec![square_feet, bedrooms, age];
2023 let names = vec![
2024 "Intercept".to_string(),
2025 "Square_Feet".to_string(),
2026 "Bedrooms".to_string(),
2027 "Age".to_string(),
2028 ];
2029
2030 let result = core::ols_regression(&y, &x_vars, &names)?;
2031
2032 // Check against R results
2033 let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
2034 let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
2035
2036 let tolerance = 1e-4;
2037 let mut mismatches = vec![];
2038
2039 for i in 0..4 {
2040 if (result.coefficients[i] - expected_coeffs[i]).abs() > tolerance {
2041 mismatches.push(format!(
2042 "coeff[{}] differs: got {}, expected {}",
2043 i, result.coefficients[i], expected_coeffs[i]
2044 ));
2045 }
2046 if (result.std_errors[i] - expected_std_errs[i]).abs() > tolerance {
2047 mismatches.push(format!(
2048 "std_err[{}] differs: got {}, expected {}",
2049 i, result.std_errors[i], expected_std_errs[i]
2050 ));
2051 }
2052 }
2053
2054 if mismatches.is_empty() {
2055 Ok(serde_json::json!({ "status": "PASS" }).to_string())
2056 } else {
2057 Ok(serde_json::json!({ "status": "FAIL", "mismatches": mismatches }).to_string())
2058 }
2059}