linreg_core/lib.rs
1//! Linear Regression Library with WASM bindings.
2//!
3//! This library provides Ordinary Least Squares (OLS) regression and
4//! statistical diagnostic tests. When the `wasm` feature is enabled,
5//! it exposes JavaScript bindings for use in web browsers.
6//!
7//! # Features
8//!
9//! - OLS regression with QR decomposition for numerical stability
10//! - Comprehensive diagnostic tests (Rainbow, Harvey-Collier, Breusch-Pagan, White, Jarque-Bera, Durbin-Watson, Shapiro-Wilk, Anderson-Darling)
11//! - Variance Inflation Factor (VIF) analysis
12//! - Custom implementations of all linear algebra and statistical functions
13//!
14//! # Module Structure
15//!
16//! - [`core`] - OLS regression implementation
17//! - [`diagnostics`] - Statistical diagnostic tests
18//! - [`distributions`] - Statistical distributions (t, F, chi-squared, normal)
19//! - [`linalg`] - Matrix operations and QR decomposition
20//! - [`error`] - Error types
21//!
22//! # WASM API
23//!
24//! When the `wasm` feature is enabled, the library exposes JavaScript bindings that
25//! accept and return JSON strings for easy integration. By default, all domains are
26//! allowed. To restrict usage to specific domains, set the `LINREG_DOMAIN_RESTRICT`
27//! environment variable at build time:
28//!
29//! ```bash
30//! LINREG_DOMAIN_RESTRICT=example.com,mysite.com wasm-pack build
31//! ```
32//!
33//! # Example (Native Rust)
34//!
35//! ```
36//! use linreg_core::core::ols_regression;
37//! use linreg_core::Error;
38//!
39//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3, 7.0];
40//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
41//! let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0, 2.0];
42//! let names = vec![
43//! "Intercept".to_string(),
44//! "Temperature".to_string(),
45//! "Pressure".to_string(),
46//! ];
47//!
48//! let result = ols_regression(&y, &[x1, x2], &names)?;
49//! println!("R²: {}", result.r_squared);
50//! # Ok::<(), Error>(())
51//! ```
52
53// Import core modules (always available)
54pub mod core;
55pub mod diagnostics;
56pub mod distributions;
57pub mod linalg;
58pub mod error;
59
60// Unit tests are now in tests/unit/ directory
61// - error_tests.rs -> tests/unit/error_tests.rs
62// - core_tests.rs -> tests/unit/core_tests.rs
63// - linalg_tests.rs -> tests/unit/linalg_tests.rs
64// - validation_tests.rs -> tests/validation/main.rs
65// - diagnostics_tests.rs: disabled (references unimplemented functions)
66
67// Re-export public API (always available)
68pub use core::{RegressionOutput, VifResult};
69pub use diagnostics::{
70 DiagnosticTestResult,
71 RainbowTestOutput, RainbowSingleResult, RainbowMethod,
72 WhiteTestOutput, WhiteSingleResult, WhiteMethod,
73 CooksDistanceResult,
74};
75
76// Re-export core test functions with different names to avoid WASM conflicts
77pub use diagnostics::rainbow_test as rainbow_test_core;
78pub use diagnostics::white_test as white_test_core;
79
80pub use error::{Error, Result, error_json, error_to_json};
81
82// ============================================================================
83// WASM-specific code (only compiled when "wasm" feature is enabled)
84// ============================================================================
85
86#[cfg(feature = "wasm")]
87use wasm_bindgen::prelude::*;
88
89#[cfg(feature = "wasm")]
90use std::collections::HashSet;
91
92#[cfg(feature = "wasm")]
93use serde::Serialize;
94
95#[cfg(feature = "wasm")]
96use crate::distributions::{student_t_cdf, normal_inverse_cdf};
97
98// ============================================================================
99// CSV Parsing (WASM-only)
100// ============================================================================
101
102#[cfg(feature = "wasm")]
103#[derive(Serialize)]
104struct ParsedCsv {
105 headers: Vec<String>,
106 data: Vec<serde_json::Map<String, serde_json::Value>>,
107 numeric_columns: Vec<String>,
108}
109
110#[cfg(feature = "wasm")]
111#[wasm_bindgen]
112/// Parses CSV data and returns it as a JSON string.
113///
114/// Parses the CSV content and identifies numeric columns. Returns a JSON object
115/// with headers, data rows, and a list of numeric column names.
116///
117/// # Arguments
118///
119/// * `content` - CSV content as a string
120///
121/// # Returns
122///
123/// JSON string with structure:
124/// ```json
125/// {
126/// "headers": ["col1", "col2", ...],
127/// "data": [{"col1": 1.0, "col2": "text"}, ...],
128/// "numeric_columns": ["col1", ...]
129/// }
130/// ```
131///
132/// # Errors
133///
134/// Returns a JSON error object if parsing fails or domain check fails.
135pub fn parse_csv(content: &str) -> String {
136 if let Err(e) = check_domain() {
137 return error_to_json(&e);
138 }
139
140 let mut reader = csv::ReaderBuilder::new()
141 .has_headers(true)
142 .flexible(true)
143 .from_reader(content.as_bytes());
144
145 // Get headers
146 let headers: Vec<String> = match reader.headers() {
147 Ok(h) => h.iter().map(|s| s.to_string()).collect(),
148 Err(e) => return error_json(&format!("Failed to read headers: {}", e)),
149 };
150
151 let mut data = Vec::new();
152 let mut numeric_col_set = HashSet::new();
153
154 for result in reader.records() {
155 let record = match result {
156 Ok(r) => r,
157 Err(e) => return error_json(&format!("Failed to parse CSV record: {}", e)),
158 };
159
160 if record.len() != headers.len() {
161 continue;
162 }
163
164 let mut row_map = serde_json::Map::new();
165
166 for (i, field) in record.iter().enumerate() {
167 if i >= headers.len() {
168 continue;
169 }
170
171 let header = &headers[i];
172 let val_trimmed = field.trim();
173
174 // Try to parse as f64
175 if let Ok(num) = val_trimmed.parse::<f64>() {
176 if num.is_finite() {
177 row_map.insert(
178 header.clone(),
179 serde_json::Value::Number(serde_json::Number::from_f64(num).unwrap()),
180 );
181 numeric_col_set.insert(header.clone());
182 continue;
183 }
184 }
185
186 // Fallback to string
187 row_map.insert(header.clone(), serde_json::Value::String(val_trimmed.to_string()));
188 }
189 data.push(row_map);
190 }
191
192 let mut numeric_columns: Vec<String> = numeric_col_set.into_iter().collect();
193 numeric_columns.sort();
194
195 let output = ParsedCsv {
196 headers,
197 data,
198 numeric_columns,
199 };
200
201 serde_json::to_string(&output).unwrap_or_else(|_| error_json("Failed to serialize CSV output"))
202}
203
204// ============================================================================
205// OLS Regression WASM Wrapper
206// ============================================================================
207
208#[cfg(feature = "wasm")]
209#[wasm_bindgen]
210/// Performs OLS regression via WASM.
211///
212/// All parameters and return values are JSON-encoded strings for JavaScript
213/// interoperability. Returns regression output including coefficients,
214/// standard errors, diagnostic statistics, and VIF analysis.
215///
216/// # Arguments
217///
218/// * `y_json` - JSON array of response variable values: `[1.0, 2.0, 3.0]`
219/// * `x_vars_json` - JSON array of predictor arrays: `[[1.0, 2.0], [0.5, 1.0]]`
220/// * `variable_names` - JSON array of variable names: `["Intercept", "X1", "X2"]`
221///
222/// # Returns
223///
224/// JSON string containing the complete regression output with coefficients,
225/// standard errors, t-statistics, p-values, R², F-statistic, residuals, leverage, VIF, etc.
226///
227/// # Errors
228///
229/// Returns a JSON error object if:
230/// - JSON parsing fails
231/// - Insufficient data (n ≤ k + 1)
232/// - Matrix is singular
233/// - Domain check fails
234pub fn ols_regression(
235 y_json: &str,
236 x_vars_json: &str,
237 variable_names: &str,
238) -> String {
239 if let Err(e) = check_domain() {
240 return error_to_json(&e);
241 }
242
243 // Parse JSON input
244 let y: Vec<f64> = match serde_json::from_str(y_json) {
245 Ok(v) => v,
246 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
247 };
248
249 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
250 Ok(v) => v,
251 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
252 };
253
254 let names: Vec<String> = match serde_json::from_str(variable_names) {
255 Ok(v) => v,
256 Err(_) => vec!["Intercept".to_string()],
257 };
258
259 // Call core function
260 match core::ols_regression(&y, &x_vars, &names) {
261 Ok(output) => serde_json::to_string(&output)
262 .unwrap_or_else(|_| error_json("Failed to serialize output")),
263 Err(e) => error_json(&e.to_string()),
264 }
265}
266
267// ============================================================================
268// Diagnostic Tests WASM Wrappers
269// ============================================================================
270
271#[cfg(feature = "wasm")]
272#[wasm_bindgen]
273/// Performs the Rainbow test for linearity via WASM.
274///
275/// The Rainbow test checks whether the relationship between predictors and response
276/// is linear. A significant p-value suggests non-linearity.
277///
278/// # Arguments
279///
280/// * `y_json` - JSON array of response variable values
281/// * `x_vars_json` - JSON array of predictor arrays
282/// * `fraction` - Fraction of data to use in the central subset (0.0 to 1.0, typically 0.5)
283/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
284///
285/// # Returns
286///
287/// JSON string containing test statistic, p-value, and interpretation.
288///
289/// # Errors
290///
291/// Returns a JSON error object if parsing fails or domain check fails.
292pub fn rainbow_test(
293 y_json: &str,
294 x_vars_json: &str,
295 fraction: f64,
296 method: &str,
297) -> String {
298 if let Err(e) = check_domain() {
299 return error_to_json(&e);
300 }
301
302 let y: Vec<f64> = match serde_json::from_str(y_json) {
303 Ok(v) => v,
304 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
305 };
306
307 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
308 Ok(v) => v,
309 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
310 };
311
312 // Parse method parameter (default to "r" for R)
313 let method = match method.to_lowercase().as_str() {
314 "python" => diagnostics::RainbowMethod::Python,
315 "both" => diagnostics::RainbowMethod::Both,
316 _ => diagnostics::RainbowMethod::R, // Default to R
317 };
318
319 match diagnostics::rainbow_test(&y, &x_vars, fraction, method) {
320 Ok(output) => serde_json::to_string(&output)
321 .unwrap_or_else(|_| error_json("Failed to serialize Rainbow test result")),
322 Err(e) => error_json(&e.to_string()),
323 }
324}
325
326#[cfg(feature = "wasm")]
327#[wasm_bindgen]
328/// Performs the Harvey-Collier test for linearity via WASM.
329///
330/// The Harvey-Collier test checks whether the residuals exhibit a linear trend,
331/// which would indicate that the model's functional form is misspecified.
332/// A significant p-value suggests non-linearity.
333///
334/// # Arguments
335///
336/// * `y_json` - JSON array of response variable values
337/// * `x_vars_json` - JSON array of predictor arrays
338///
339/// # Returns
340///
341/// JSON string containing test statistic, p-value, and interpretation.
342///
343/// # Errors
344///
345/// Returns a JSON error object if parsing fails or domain check fails.
346pub fn harvey_collier_test(
347 y_json: &str,
348 x_vars_json: &str,
349) -> String {
350 if let Err(e) = check_domain() {
351 return error_to_json(&e);
352 }
353
354 let y: Vec<f64> = match serde_json::from_str(y_json) {
355 Ok(v) => v,
356 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
357 };
358
359 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
360 Ok(v) => v,
361 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
362 };
363
364 match diagnostics::harvey_collier_test(&y, &x_vars) {
365 Ok(output) => serde_json::to_string(&output)
366 .unwrap_or_else(|_| error_json("Failed to serialize Harvey-Collier test result")),
367 Err(e) => error_json(&e.to_string()),
368 }
369}
370
371/// Performs the Breusch-Pagan test for heteroscedasticity via WASM.
372///
373/// The Breusch-Pagan test checks whether the variance of residuals is constant
374/// across the range of predicted values (homoscedasticity assumption).
375/// A significant p-value suggests heteroscedasticity.
376///
377/// # Arguments
378///
379/// * `y_json` - JSON array of response variable values
380/// * `x_vars_json` - JSON array of predictor arrays
381///
382/// # Returns
383///
384/// JSON string containing test statistic, p-value, and interpretation.
385///
386/// # Errors
387///
388/// Returns a JSON error object if parsing fails or domain check fails.
389#[cfg(feature = "wasm")]
390#[wasm_bindgen]
391pub fn breusch_pagan_test(
392 y_json: &str,
393 x_vars_json: &str,
394) -> String {
395 if let Err(e) = check_domain() {
396 return error_to_json(&e);
397 }
398
399 let y: Vec<f64> = match serde_json::from_str(y_json) {
400 Ok(v) => v,
401 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
402 };
403
404 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
405 Ok(v) => v,
406 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
407 };
408
409 match diagnostics::breusch_pagan_test(&y, &x_vars) {
410 Ok(output) => serde_json::to_string(&output)
411 .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Pagan test result")),
412 Err(e) => error_json(&e.to_string()),
413 }
414}
415
416/// Performs the White test for heteroscedasticity via WASM.
417///
418/// The White test is a more general test for heteroscedasticity that does not
419/// assume a specific form of heteroscedasticity. A significant p-value suggests
420/// that the error variance is not constant.
421///
422/// # Arguments
423///
424/// * `y_json` - JSON array of response variable values
425/// * `x_vars_json` - JSON array of predictor arrays
426/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
427///
428/// # Returns
429///
430/// JSON string containing test statistic, p-value, and interpretation.
431///
432/// # Errors
433///
434/// Returns a JSON error object if parsing fails or domain check fails.
435#[cfg(feature = "wasm")]
436#[wasm_bindgen]
437pub fn white_test(
438 y_json: &str,
439 x_vars_json: &str,
440 method: &str,
441) -> String {
442 if let Err(e) = check_domain() {
443 return error_to_json(&e);
444 }
445
446 let y: Vec<f64> = match serde_json::from_str(y_json) {
447 Ok(v) => v,
448 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
449 };
450
451 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
452 Ok(v) => v,
453 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
454 };
455
456 // Parse method parameter (default to "r" for R)
457 let method = match method.to_lowercase().as_str() {
458 "python" => diagnostics::WhiteMethod::Python,
459 "both" => diagnostics::WhiteMethod::Both,
460 _ => diagnostics::WhiteMethod::R, // Default to R
461 };
462
463 match diagnostics::white_test(&y, &x_vars, method) {
464 Ok(output) => serde_json::to_string(&output)
465 .unwrap_or_else(|_| error_json("Failed to serialize White test result")),
466 Err(e) => error_json(&e.to_string()),
467 }
468}
469
470/// Performs the R method White test for heteroscedasticity via WASM.
471///
472/// This implementation matches R's `skedastic::white()` function behavior.
473/// Uses the standard QR decomposition and the R-specific auxiliary matrix
474/// structure (intercept, X, X² only - no cross-products).
475///
476/// # Arguments
477///
478/// * `y_json` - JSON array of response variable values
479/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
480///
481/// # Returns
482///
483/// JSON string containing test statistic, p-value, and interpretation.
484///
485/// # Errors
486///
487/// Returns a JSON error object if parsing fails or domain check fails.
488#[cfg(feature = "wasm")]
489#[wasm_bindgen]
490pub fn r_white_test(
491 y_json: &str,
492 x_vars_json: &str,
493) -> String {
494 if let Err(e) = check_domain() {
495 return error_to_json(&e);
496 }
497
498 let y: Vec<f64> = match serde_json::from_str(y_json) {
499 Ok(v) => v,
500 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
501 };
502
503 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
504 Ok(v) => v,
505 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
506 };
507
508 match diagnostics::r_white_method(&y, &x_vars) {
509 Ok(output) => serde_json::to_string(&output)
510 .unwrap_or_else(|_| error_json("Failed to serialize R White test result")),
511 Err(e) => error_json(&e.to_string()),
512 }
513}
514
515/// Performs the Python method White test for heteroscedasticity via WASM.
516///
517/// This implementation matches Python's `statsmodels.stats.diagnostic.het_white()` function.
518/// Uses the LINPACK QR decomposition with column pivoting and the Python-specific
519/// auxiliary matrix structure (intercept, X, X², and cross-products).
520///
521/// # Arguments
522///
523/// * `y_json` - JSON array of response variable values
524/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
525///
526/// # Returns
527///
528/// JSON string containing test statistic, p-value, and interpretation.
529///
530/// # Errors
531///
532/// Returns a JSON error object if parsing fails or domain check fails.
533#[cfg(feature = "wasm")]
534#[wasm_bindgen]
535pub fn python_white_test(
536 y_json: &str,
537 x_vars_json: &str,
538) -> String {
539 if let Err(e) = check_domain() {
540 return error_to_json(&e);
541 }
542
543 let y: Vec<f64> = match serde_json::from_str(y_json) {
544 Ok(v) => v,
545 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
546 };
547
548 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
549 Ok(v) => v,
550 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
551 };
552
553 match diagnostics::python_white_method(&y, &x_vars) {
554 Ok(output) => serde_json::to_string(&output)
555 .unwrap_or_else(|_| error_json("Failed to serialize Python White test result")),
556 Err(e) => error_json(&e.to_string()),
557 }
558}
559
560/// Performs the Jarque-Bera test for normality via WASM.
561///
562/// The Jarque-Bera test checks whether the residuals are normally distributed
563/// by examining skewness and kurtosis. A significant p-value suggests that
564/// the residuals deviate from normality.
565///
566/// # Arguments
567///
568/// * `y_json` - JSON array of response variable values
569/// * `x_vars_json` - JSON array of predictor arrays
570///
571/// # Returns
572///
573/// JSON string containing test statistic, p-value, and interpretation.
574///
575/// # Errors
576///
577/// Returns a JSON error object if parsing fails or domain check fails.
578#[cfg(feature = "wasm")]
579#[wasm_bindgen]
580pub fn jarque_bera_test(
581 y_json: &str,
582 x_vars_json: &str,
583) -> String {
584 if let Err(e) = check_domain() {
585 return error_to_json(&e);
586 }
587
588 let y: Vec<f64> = match serde_json::from_str(y_json) {
589 Ok(v) => v,
590 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
591 };
592
593 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
594 Ok(v) => v,
595 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
596 };
597
598 match diagnostics::jarque_bera_test(&y, &x_vars) {
599 Ok(output) => serde_json::to_string(&output)
600 .unwrap_or_else(|_| error_json("Failed to serialize Jarque-Bera test result")),
601 Err(e) => error_json(&e.to_string()),
602 }
603}
604
605// ============================================================================
606// Durbin-Watson Test (WASM wrapper)
607// ============================================================================
608
609#[cfg(feature = "wasm")]
610#[wasm_bindgen]
611/// Performs the Durbin-Watson test for autocorrelation via WASM.
612///
613/// The Durbin-Watson test checks for autocorrelation in the residuals.
614/// Values near 2 indicate no autocorrelation, values near 0 suggest positive
615/// autocorrelation, and values near 4 suggest negative autocorrelation.
616///
617/// # Arguments
618///
619/// * `y_json` - JSON array of response variable values
620/// * `x_vars_json` - JSON array of predictor arrays
621///
622/// # Returns
623///
624/// JSON string containing the DW statistic and interpretation.
625///
626/// # Errors
627///
628/// Returns a JSON error object if parsing fails or domain check fails.
629pub fn durbin_watson_test(
630 y_json: &str,
631 x_vars_json: &str,
632) -> String {
633 if let Err(e) = check_domain() {
634 return error_to_json(&e);
635 }
636
637 let y: Vec<f64> = match serde_json::from_str(y_json) {
638 Ok(v) => v,
639 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
640 };
641
642 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
643 Ok(v) => v,
644 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
645 };
646
647 match diagnostics::durbin_watson_test(&y, &x_vars) {
648 Ok(output) => serde_json::to_string(&output)
649 .unwrap_or_else(|_| error_json("Failed to serialize Durbin-Watson test result")),
650 Err(e) => error_json(&e.to_string()),
651 }
652}
653
654// ============================================================================
655// Shapiro-Wilk Test (WASM wrapper)
656// ============================================================================
657
658/// Performs the Shapiro-Wilk test for normality via WASM.
659///
660/// The Shapiro-Wilk test is a powerful tests for normality,
661/// especially for small to moderate sample sizes (3 ≤ n ≤ 5000). It tests
662/// the null hypothesis that the residuals are normally distributed.
663///
664/// # Arguments
665///
666/// * `y_json` - JSON array of response variable values
667/// * `x_vars_json` - JSON array of predictor arrays
668///
669/// # Returns
670///
671/// JSON string containing the W statistic (ranges from 0 to 1), p-value,
672/// and interpretation.
673///
674/// # Errors
675///
676/// Returns a JSON error object if parsing fails or domain check fails.
677#[cfg(feature = "wasm")]
678#[wasm_bindgen]
679pub fn shapiro_wilk_test(
680 y_json: &str,
681 x_vars_json: &str,
682) -> String {
683 if let Err(e) = check_domain() {
684 return error_to_json(&e);
685 }
686
687 let y: Vec<f64> = match serde_json::from_str(y_json) {
688 Ok(v) => v,
689 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
690 };
691
692 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
693 Ok(v) => v,
694 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
695 };
696
697 match diagnostics::shapiro_wilk_test(&y, &x_vars) {
698 Ok(output) => serde_json::to_string(&output)
699 .unwrap_or_else(|_| error_json("Failed to serialize Shapiro-Wilk test result")),
700 Err(e) => error_json(&e.to_string()),
701 }
702}
703
704#[cfg(feature = "wasm")]
705#[wasm_bindgen]
706/// Performs the Anderson-Darling test for normality via WASM.
707///
708/// The Anderson-Darling test checks whether the residuals are normally distributed
709/// by comparing the empirical distribution to the expected normal distribution.
710/// This test is particularly sensitive to deviations in the tails of the distribution.
711/// A significant p-value suggests that the residuals deviate from normality.
712///
713/// # Arguments
714///
715/// * `y_json` - JSON array of response variable values
716/// * `x_vars_json` - JSON array of predictor arrays
717///
718/// # Returns
719///
720/// JSON string containing the A² statistic, p-value, and interpretation.
721///
722/// # Errors
723///
724/// Returns a JSON error object if parsing fails or domain check fails.
725pub fn anderson_darling_test(
726 y_json: &str,
727 x_vars_json: &str,
728) -> String {
729 if let Err(e) = check_domain() {
730 return error_to_json(&e);
731 }
732
733 let y: Vec<f64> = match serde_json::from_str(y_json) {
734 Ok(v) => v,
735 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
736 };
737
738 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
739 Ok(v) => v,
740 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
741 };
742
743 match diagnostics::anderson_darling_test(&y, &x_vars) {
744 Ok(output) => serde_json::to_string(&output)
745 .unwrap_or_else(|_| error_json("Failed to serialize Anderson-Darling test result")),
746 Err(e) => error_json(&e.to_string()),
747 }
748}
749
750// ============================================================================
751// Cook's Distance (WASM wrapper)
752// ============================================================================
753
754#[cfg(feature = "wasm")]
755#[wasm_bindgen]
756/// Computes Cook's distance for identifying influential observations via WASM.
757///
758/// Cook's distance measures how much each observation influences the regression
759/// model by comparing coefficient estimates with and without that observation.
760/// Unlike hypothesis tests, this is an influence measure - not a test with p-values.
761///
762/// # Arguments
763///
764/// * `y_json` - JSON array of response variable values
765/// * `x_vars_json` - JSON array of predictor arrays
766///
767/// # Returns
768///
769/// JSON string containing:
770/// - Vector of Cook's distances (one per observation)
771/// - Thresholds for identifying influential observations
772/// - Indices of potentially influential observations
773/// - Interpretation and guidance
774///
775/// # Errors
776///
777/// Returns a JSON error object if parsing fails or domain check fails.
778pub fn cooks_distance_test(
779 y_json: &str,
780 x_vars_json: &str,
781) -> String {
782 if let Err(e) = check_domain() {
783 return error_to_json(&e);
784 }
785
786 let y: Vec<f64> = match serde_json::from_str(y_json) {
787 Ok(v) => v,
788 Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
789 };
790
791 let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
792 Ok(v) => v,
793 Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
794 };
795
796 match diagnostics::cooks_distance_test(&y, &x_vars) {
797 Ok(output) => serde_json::to_string(&output)
798 .unwrap_or_else(|_| error_json("Failed to serialize Cook's distance result")),
799 Err(e) => error_json(&e.to_string()),
800 }
801}
802
803// ============================================================================
804// Statistical Utility Functions (WASM wrappers)
805// ============================================================================
806
807#[cfg(feature = "wasm")]
808#[wasm_bindgen]
809/// Computes the Student's t-distribution cumulative distribution function.
810///
811/// Returns P(T ≤ t) for a t-distribution with the given degrees of freedom.
812///
813/// # Arguments
814///
815/// * `t` - t-statistic value
816/// * `df` - Degrees of freedom
817///
818/// # Returns
819///
820/// The CDF value, or `NaN` if domain check fails.
821pub fn get_t_cdf(t: f64, df: f64) -> f64 {
822 if check_domain().is_err() {
823 return f64::NAN;
824 }
825
826 student_t_cdf(t, df)
827}
828
829#[cfg(feature = "wasm")]
830#[wasm_bindgen]
831/// Computes the critical t-value for a given significance level.
832///
833/// Returns the t-value such that the area under the t-distribution curve
834/// to the right equals alpha/2 (two-tailed test).
835///
836/// # Arguments
837///
838/// * `alpha` - Significance level (typically 0.05 for 95% confidence)
839/// * `df` - Degrees of freedom
840///
841/// # Returns
842///
843/// The critical t-value, or `NaN` if domain check fails.
844pub fn get_t_critical(alpha: f64, df: f64) -> f64 {
845 if check_domain().is_err() {
846 return f64::NAN;
847 }
848
849 core::t_critical_quantile(df, alpha)
850}
851
852#[cfg(feature = "wasm")]
853#[wasm_bindgen]
854/// Computes the inverse of the standard normal CDF (probit function).
855///
856/// Returns the z-score such that P(Z ≤ z) = p for a standard normal distribution.
857///
858/// # Arguments
859///
860/// * `p` - Probability (0 < p < 1)
861///
862/// # Returns
863///
864/// The z-score, or `NaN` if domain check fails.
865pub fn get_normal_inverse(p: f64) -> f64 {
866 if check_domain().is_err() {
867 return f64::NAN;
868 }
869
870 normal_inverse_cdf(p)
871}
872
873// ============================================================================
874// Domain Check (WASM-only)
875// ============================================================================
876//
877// By default, all domains are allowed. To enable domain restriction, set the
878// LINREG_DOMAIN_RESTRICT environment variable at build time:
879//
880// LINREG_DOMAIN_RESTRICT=example.com,yoursite.com wasm-pack build
881//
882// Example for jesse-anderson.net:
883// LINREG_DOMAIN_RESTRICT=jesse-anderson.net,tools.jesse-anderson.net,localhost,127.0.0.1 wasm-pack build
884//
885// This allows downstream users to use the library without modification while
886// still providing domain restriction as an opt-in security feature.
887
888#[cfg(feature = "wasm")]
889fn check_domain() -> Result<()> {
890 // Read allowed domains from build-time environment variable
891 let allowed_domains = option_env!("LINREG_DOMAIN_RESTRICT");
892
893 match allowed_domains {
894 Some(domains) if !domains.is_empty() => {
895 // Domain restriction is enabled
896 let window = web_sys::window().ok_or(Error::DomainCheck("No window found".to_string()))?;
897 let location = window.location();
898 let hostname = location.hostname().map_err(|_| Error::DomainCheck("No hostname found".to_string()))?;
899
900 let domain_list: Vec<&str> = domains.split(',').map(|s| s.trim()).collect();
901
902 if domain_list.contains(&hostname.as_str()) {
903 Ok(())
904 } else {
905 Err(Error::DomainCheck(format!(
906 "Unauthorized domain: {}. Allowed: {}",
907 hostname, domains
908 )))
909 }
910 }
911 _ => {
912 // No restriction - allow all domains
913 Ok(())
914 }
915 }
916}
917
918// ============================================================================
919// Test Functions (WASM-only)
920// ============================================================================
921
922#[cfg(test)]
923mod tests {
924 use super::*;
925
926 #[test]
927 fn verify_housing_regression_integrity() {
928 let result = test_housing_regression_native();
929 if let Err(e) = result {
930 panic!("Regression test failed: {}", e);
931 }
932 }
933}
934
935#[cfg(feature = "wasm")]
936#[wasm_bindgen]
937/// Simple test function to verify WASM is working.
938///
939/// Returns a success message confirming the WASM module loaded correctly.
940///
941/// # Errors
942///
943/// Returns a JSON error object if domain check fails.
944pub fn test() -> String {
945 if let Err(e) = check_domain() {
946 return error_to_json(&e);
947 }
948 "Rust WASM is working!".to_string()
949}
950
951#[cfg(feature = "wasm")]
952#[wasm_bindgen]
953/// Returns the current version of the library.
954///
955/// Returns the Cargo package version as a string (e.g., "0.1.0").
956///
957/// # Errors
958///
959/// Returns a JSON error object if domain check fails.
960pub fn get_version() -> String {
961 if let Err(e) = check_domain() {
962 return error_to_json(&e);
963 }
964 env!("CARGO_PKG_VERSION").to_string()
965}
966
967#[cfg(feature = "wasm")]
968#[wasm_bindgen]
969/// Test function for t-critical value computation.
970///
971/// Returns JSON with the computed t-critical value for the given parameters.
972///
973/// # Errors
974///
975/// Returns a JSON error object if domain check fails.
976pub fn test_t_critical(df: f64, alpha: f64) -> String {
977 if let Err(e) = check_domain() {
978 return error_to_json(&e);
979 }
980 let t_crit = core::t_critical_quantile(df, alpha);
981 format!(r#"{{"df": {}, "alpha": {}, "t_critical": {}}}"#, df, alpha, t_crit)
982}
983
984#[cfg(feature = "wasm")]
985#[wasm_bindgen]
986/// Test function for confidence interval computation.
987///
988/// Returns JSON with the computed confidence interval for a coefficient.
989///
990/// # Errors
991///
992/// Returns a JSON error object if domain check fails.
993pub fn test_ci(coef: f64, se: f64, df: f64, alpha: f64) -> String {
994 if let Err(e) = check_domain() {
995 return error_to_json(&e);
996 }
997 let t_crit = core::t_critical_quantile(df, alpha);
998 format!(r#"{{"lower": {}, "upper": {}}}"#, coef - t_crit * se, coef + t_crit * se)
999}
1000
1001#[cfg(feature = "wasm")]
1002#[wasm_bindgen]
1003/// Test function for R accuracy validation.
1004///
1005/// Returns JSON comparing our statistical functions against R reference values.
1006///
1007/// # Errors
1008///
1009/// Returns a JSON error object if domain check fails.
1010pub fn test_r_accuracy() -> String {
1011 if let Err(e) = check_domain() {
1012 return error_to_json(&e);
1013 }
1014 format!(
1015 r#"{{"two_tail_p": {}, "qt_975": {}}}"#,
1016 core::two_tailed_p_value(1.6717, 21.0),
1017 core::t_critical_quantile(21.0, 0.05)
1018 )
1019}
1020
1021#[cfg(feature = "wasm")]
1022#[wasm_bindgen]
1023/// Test function for regression validation against R reference values.
1024///
1025/// Runs a regression on a housing dataset and compares results against R's lm() output.
1026/// Returns JSON with status "PASS" or "FAIL" with details.
1027///
1028/// # Errors
1029///
1030/// Returns a JSON error object if domain check fails.
1031pub fn test_housing_regression() -> String {
1032 if let Err(e) = check_domain() {
1033 return error_to_json(&e);
1034 }
1035
1036 match test_housing_regression_native() {
1037 Ok(result) => result,
1038 Err(e) => serde_json::json!({ "status": "ERROR", "error": e.to_string() }).to_string()
1039 }
1040}
1041
1042// Native Rust test function (works without WASM feature)
1043fn test_housing_regression_native() -> Result<String> {
1044 let y = vec![
1045 245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1,
1046 445.8, 167.9, 367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9,
1047 223.4, 312.5, 156.8, 423.7, 267.9
1048 ];
1049
1050 let square_feet = vec![
1051 1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1052 2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1053 1250.0, 1700.0, 850.0, 2350.0, 1400.0
1054 ];
1055 let bedrooms = vec![
1056 3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0,
1057 4.0, 2.0, 4.0, 3.0, 2.0, 5.0, 3.0, 4.0, 2.0, 4.0,
1058 3.0, 3.0, 2.0, 4.0, 3.0
1059 ];
1060 let age = vec![
1061 15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0,
1062 3.0, 30.0, 6.0, 14.0, 22.0, 1.0, 16.0, 9.0, 28.0, 4.0,
1063 19.0, 11.0, 35.0, 3.0, 13.0
1064 ];
1065
1066 let x_vars = vec![square_feet, bedrooms, age];
1067 let names = vec!["Intercept".to_string(), "Square_Feet".to_string(), "Bedrooms".to_string(), "Age".to_string()];
1068
1069 let result = core::ols_regression(&y, &x_vars, &names)?;
1070
1071 // Check against R results
1072 let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
1073 let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
1074
1075 let tolerance = 1e-4;
1076 let mut mismatches = vec![];
1077
1078 for i in 0..4 {
1079 if (result.coefficients[i] - expected_coeffs[i]).abs() > tolerance {
1080 mismatches.push(format!("coeff[{}] differs: got {}, expected {}", i, result.coefficients[i], expected_coeffs[i]));
1081 }
1082 if (result.std_errors[i] - expected_std_errs[i]).abs() > tolerance {
1083 mismatches.push(format!("std_err[{}] differs: got {}, expected {}", i, result.std_errors[i], expected_std_errs[i]));
1084 }
1085 }
1086
1087 if mismatches.is_empty() {
1088 Ok(serde_json::json!({ "status": "PASS" }).to_string())
1089 } else {
1090 Ok(serde_json::json!({ "status": "FAIL", "mismatches": mismatches }).to_string())
1091 }
1092}