linreg_core/
lib.rs

1//! Linear Regression Library with WASM bindings.
2//!
3//! This library provides Ordinary Least Squares (OLS) regression and
4//! statistical diagnostic tests. When the `wasm` feature is enabled,
5//! it exposes JavaScript bindings for use in web browsers.
6//!
7//! # Features
8//!
9//! - OLS regression with QR decomposition for numerical stability
10//! - Comprehensive diagnostic tests (Rainbow, Harvey-Collier, Breusch-Pagan, White, Jarque-Bera, Durbin-Watson, Shapiro-Wilk, Anderson-Darling)
11//! - Variance Inflation Factor (VIF) analysis
12//! - Custom implementations of all linear algebra and statistical functions
13//!
14//! # Module Structure
15//!
16//! - [`core`] - OLS regression implementation
17//! - [`diagnostics`] - Statistical diagnostic tests
18//! - [`distributions`] - Statistical distributions (t, F, chi-squared, normal)
19//! - [`linalg`] - Matrix operations and QR decomposition
20//! - [`error`] - Error types
21//!
22//! # WASM API
23//!
24//! When the `wasm` feature is enabled, the library exposes JavaScript bindings that
25//! accept and return JSON strings for easy integration. By default, all domains are
26//! allowed. To restrict usage to specific domains, set the `LINREG_DOMAIN_RESTRICT`
27//! environment variable at build time:
28//!
29//! ```bash
30//! LINREG_DOMAIN_RESTRICT=example.com,mysite.com wasm-pack build
31//! ```
32//!
33//! # Example (Native Rust)
34//!
35//! ```
36//! use linreg_core::core::ols_regression;
37//! use linreg_core::Error;
38//!
39//! let y = vec![2.5, 3.7, 4.2, 5.1, 6.3, 7.0];
40//! let x1 = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
41//! let x2 = vec![2.0, 4.0, 5.0, 4.0, 3.0, 2.0];
42//! let names = vec![
43//!     "Intercept".to_string(),
44//!     "Temperature".to_string(),
45//!     "Pressure".to_string(),
46//! ];
47//!
48//! let result = ols_regression(&y, &[x1, x2], &names)?;
49//! println!("R²: {}", result.r_squared);
50//! # Ok::<(), Error>(())
51//! ```
52
53// Import core modules (always available)
54pub mod core;
55pub mod diagnostics;
56pub mod distributions;
57pub mod linalg;
58pub mod error;
59
60// Unit tests are now in tests/unit/ directory
61// - error_tests.rs -> tests/unit/error_tests.rs
62// - core_tests.rs -> tests/unit/core_tests.rs
63// - linalg_tests.rs -> tests/unit/linalg_tests.rs
64// - validation_tests.rs -> tests/validation/main.rs
65// - diagnostics_tests.rs: disabled (references unimplemented functions)
66
67// Re-export public API (always available)
68pub use core::{RegressionOutput, VifResult};
69pub use diagnostics::{
70    DiagnosticTestResult,
71    RainbowTestOutput, RainbowSingleResult, RainbowMethod,
72    WhiteTestOutput, WhiteSingleResult, WhiteMethod,
73    CooksDistanceResult,
74};
75
76// Re-export core test functions with different names to avoid WASM conflicts
77pub use diagnostics::rainbow_test as rainbow_test_core;
78pub use diagnostics::white_test as white_test_core;
79
80pub use error::{Error, Result, error_json, error_to_json};
81
82// ============================================================================
83// WASM-specific code (only compiled when "wasm" feature is enabled)
84// ============================================================================
85
86#[cfg(feature = "wasm")]
87use wasm_bindgen::prelude::*;
88
89#[cfg(feature = "wasm")]
90use std::collections::HashSet;
91
92#[cfg(feature = "wasm")]
93use serde::Serialize;
94
95#[cfg(feature = "wasm")]
96use crate::distributions::{student_t_cdf, normal_inverse_cdf};
97
98// ============================================================================
99// CSV Parsing (WASM-only)
100// ============================================================================
101
102#[cfg(feature = "wasm")]
103#[derive(Serialize)]
104struct ParsedCsv {
105    headers: Vec<String>,
106    data: Vec<serde_json::Map<String, serde_json::Value>>,
107    numeric_columns: Vec<String>,
108}
109
110#[cfg(feature = "wasm")]
111#[wasm_bindgen]
112/// Parses CSV data and returns it as a JSON string.
113///
114/// Parses the CSV content and identifies numeric columns. Returns a JSON object
115/// with headers, data rows, and a list of numeric column names.
116///
117/// # Arguments
118///
119/// * `content` - CSV content as a string
120///
121/// # Returns
122///
123/// JSON string with structure:
124/// ```json
125/// {
126///   "headers": ["col1", "col2", ...],
127///   "data": [{"col1": 1.0, "col2": "text"}, ...],
128///   "numeric_columns": ["col1", ...]
129/// }
130/// ```
131///
132/// # Errors
133///
134/// Returns a JSON error object if parsing fails or domain check fails.
135pub fn parse_csv(content: &str) -> String {
136    if let Err(e) = check_domain() {
137        return error_to_json(&e);
138    }
139
140    let mut reader = csv::ReaderBuilder::new()
141        .has_headers(true)
142        .flexible(true)
143        .from_reader(content.as_bytes());
144
145    // Get headers
146    let headers: Vec<String> = match reader.headers() {
147        Ok(h) => h.iter().map(|s| s.to_string()).collect(),
148        Err(e) => return error_json(&format!("Failed to read headers: {}", e)),
149    };
150
151    let mut data = Vec::new();
152    let mut numeric_col_set = HashSet::new();
153
154    for result in reader.records() {
155        let record = match result {
156            Ok(r) => r,
157            Err(e) => return error_json(&format!("Failed to parse CSV record: {}", e)),
158        };
159
160        if record.len() != headers.len() {
161            continue;
162        }
163
164        let mut row_map = serde_json::Map::new();
165
166        for (i, field) in record.iter().enumerate() {
167            if i >= headers.len() {
168                continue;
169            }
170
171            let header = &headers[i];
172            let val_trimmed = field.trim();
173
174            // Try to parse as f64
175            if let Ok(num) = val_trimmed.parse::<f64>() {
176                if num.is_finite() {
177                    row_map.insert(
178                        header.clone(),
179                        serde_json::Value::Number(serde_json::Number::from_f64(num).unwrap()),
180                    );
181                    numeric_col_set.insert(header.clone());
182                    continue;
183                }
184            }
185
186            // Fallback to string
187            row_map.insert(header.clone(), serde_json::Value::String(val_trimmed.to_string()));
188        }
189        data.push(row_map);
190    }
191
192    let mut numeric_columns: Vec<String> = numeric_col_set.into_iter().collect();
193    numeric_columns.sort();
194
195    let output = ParsedCsv {
196        headers,
197        data,
198        numeric_columns,
199    };
200
201    serde_json::to_string(&output).unwrap_or_else(|_| error_json("Failed to serialize CSV output"))
202}
203
204// ============================================================================
205// OLS Regression WASM Wrapper
206// ============================================================================
207
208#[cfg(feature = "wasm")]
209#[wasm_bindgen]
210/// Performs OLS regression via WASM.
211///
212/// All parameters and return values are JSON-encoded strings for JavaScript
213/// interoperability. Returns regression output including coefficients,
214/// standard errors, diagnostic statistics, and VIF analysis.
215///
216/// # Arguments
217///
218/// * `y_json` - JSON array of response variable values: `[1.0, 2.0, 3.0]`
219/// * `x_vars_json` - JSON array of predictor arrays: `[[1.0, 2.0], [0.5, 1.0]]`
220/// * `variable_names` - JSON array of variable names: `["Intercept", "X1", "X2"]`
221///
222/// # Returns
223///
224/// JSON string containing the complete regression output with coefficients,
225/// standard errors, t-statistics, p-values, R², F-statistic, residuals, leverage, VIF, etc.
226///
227/// # Errors
228///
229/// Returns a JSON error object if:
230/// - JSON parsing fails
231/// - Insufficient data (n ≤ k + 1)
232/// - Matrix is singular
233/// - Domain check fails
234pub fn ols_regression(
235    y_json: &str,
236    x_vars_json: &str,
237    variable_names: &str,
238) -> String {
239    if let Err(e) = check_domain() {
240        return error_to_json(&e);
241    }
242
243    // Parse JSON input
244    let y: Vec<f64> = match serde_json::from_str(y_json) {
245        Ok(v) => v,
246        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
247    };
248
249    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
250        Ok(v) => v,
251        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
252    };
253
254    let names: Vec<String> = match serde_json::from_str(variable_names) {
255        Ok(v) => v,
256        Err(_) => vec!["Intercept".to_string()],
257    };
258
259    // Call core function
260    match core::ols_regression(&y, &x_vars, &names) {
261        Ok(output) => serde_json::to_string(&output)
262            .unwrap_or_else(|_| error_json("Failed to serialize output")),
263        Err(e) => error_json(&e.to_string()),
264    }
265}
266
267// ============================================================================
268// Diagnostic Tests WASM Wrappers
269// ============================================================================
270
271#[cfg(feature = "wasm")]
272#[wasm_bindgen]
273/// Performs the Rainbow test for linearity via WASM.
274///
275/// The Rainbow test checks whether the relationship between predictors and response
276/// is linear. A significant p-value suggests non-linearity.
277///
278/// # Arguments
279///
280/// * `y_json` - JSON array of response variable values
281/// * `x_vars_json` - JSON array of predictor arrays
282/// * `fraction` - Fraction of data to use in the central subset (0.0 to 1.0, typically 0.5)
283/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
284///
285/// # Returns
286///
287/// JSON string containing test statistic, p-value, and interpretation.
288///
289/// # Errors
290///
291/// Returns a JSON error object if parsing fails or domain check fails.
292pub fn rainbow_test(
293    y_json: &str,
294    x_vars_json: &str,
295    fraction: f64,
296    method: &str,
297) -> String {
298    if let Err(e) = check_domain() {
299        return error_to_json(&e);
300    }
301
302    let y: Vec<f64> = match serde_json::from_str(y_json) {
303        Ok(v) => v,
304        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
305    };
306
307    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
308        Ok(v) => v,
309        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
310    };
311
312    // Parse method parameter (default to "r" for R)
313    let method = match method.to_lowercase().as_str() {
314        "python" => diagnostics::RainbowMethod::Python,
315        "both" => diagnostics::RainbowMethod::Both,
316        _ => diagnostics::RainbowMethod::R,  // Default to R
317    };
318
319    match diagnostics::rainbow_test(&y, &x_vars, fraction, method) {
320        Ok(output) => serde_json::to_string(&output)
321            .unwrap_or_else(|_| error_json("Failed to serialize Rainbow test result")),
322        Err(e) => error_json(&e.to_string()),
323    }
324}
325
326#[cfg(feature = "wasm")]
327#[wasm_bindgen]
328/// Performs the Harvey-Collier test for linearity via WASM.
329///
330/// The Harvey-Collier test checks whether the residuals exhibit a linear trend,
331/// which would indicate that the model's functional form is misspecified.
332/// A significant p-value suggests non-linearity.
333///
334/// # Arguments
335///
336/// * `y_json` - JSON array of response variable values
337/// * `x_vars_json` - JSON array of predictor arrays
338///
339/// # Returns
340///
341/// JSON string containing test statistic, p-value, and interpretation.
342///
343/// # Errors
344///
345/// Returns a JSON error object if parsing fails or domain check fails.
346pub fn harvey_collier_test(
347    y_json: &str,
348    x_vars_json: &str,
349) -> String {
350    if let Err(e) = check_domain() {
351        return error_to_json(&e);
352    }
353
354    let y: Vec<f64> = match serde_json::from_str(y_json) {
355        Ok(v) => v,
356        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
357    };
358
359    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
360        Ok(v) => v,
361        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
362    };
363
364    match diagnostics::harvey_collier_test(&y, &x_vars) {
365        Ok(output) => serde_json::to_string(&output)
366            .unwrap_or_else(|_| error_json("Failed to serialize Harvey-Collier test result")),
367        Err(e) => error_json(&e.to_string()),
368    }
369}
370
371/// Performs the Breusch-Pagan test for heteroscedasticity via WASM.
372///
373/// The Breusch-Pagan test checks whether the variance of residuals is constant
374/// across the range of predicted values (homoscedasticity assumption).
375/// A significant p-value suggests heteroscedasticity.
376///
377/// # Arguments
378///
379/// * `y_json` - JSON array of response variable values
380/// * `x_vars_json` - JSON array of predictor arrays
381///
382/// # Returns
383///
384/// JSON string containing test statistic, p-value, and interpretation.
385///
386/// # Errors
387///
388/// Returns a JSON error object if parsing fails or domain check fails.
389#[cfg(feature = "wasm")]
390#[wasm_bindgen]
391pub fn breusch_pagan_test(
392    y_json: &str,
393    x_vars_json: &str,
394) -> String {
395    if let Err(e) = check_domain() {
396        return error_to_json(&e);
397    }
398
399    let y: Vec<f64> = match serde_json::from_str(y_json) {
400        Ok(v) => v,
401        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
402    };
403
404    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
405        Ok(v) => v,
406        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
407    };
408
409    match diagnostics::breusch_pagan_test(&y, &x_vars) {
410        Ok(output) => serde_json::to_string(&output)
411            .unwrap_or_else(|_| error_json("Failed to serialize Breusch-Pagan test result")),
412        Err(e) => error_json(&e.to_string()),
413    }
414}
415
416/// Performs the White test for heteroscedasticity via WASM.
417///
418/// The White test is a more general test for heteroscedasticity that does not
419/// assume a specific form of heteroscedasticity. A significant p-value suggests
420/// that the error variance is not constant.
421///
422/// # Arguments
423///
424/// * `y_json` - JSON array of response variable values
425/// * `x_vars_json` - JSON array of predictor arrays
426/// * `method` - Method to use: "r", "python", or "both" (case-insensitive, defaults to "r")
427///
428/// # Returns
429///
430/// JSON string containing test statistic, p-value, and interpretation.
431///
432/// # Errors
433///
434/// Returns a JSON error object if parsing fails or domain check fails.
435#[cfg(feature = "wasm")]
436#[wasm_bindgen]
437pub fn white_test(
438    y_json: &str,
439    x_vars_json: &str,
440    method: &str,
441) -> String {
442    if let Err(e) = check_domain() {
443        return error_to_json(&e);
444    }
445
446    let y: Vec<f64> = match serde_json::from_str(y_json) {
447        Ok(v) => v,
448        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
449    };
450
451    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
452        Ok(v) => v,
453        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
454    };
455
456    // Parse method parameter (default to "r" for R)
457    let method = match method.to_lowercase().as_str() {
458        "python" => diagnostics::WhiteMethod::Python,
459        "both" => diagnostics::WhiteMethod::Both,
460        _ => diagnostics::WhiteMethod::R,  // Default to R
461    };
462
463    match diagnostics::white_test(&y, &x_vars, method) {
464        Ok(output) => serde_json::to_string(&output)
465            .unwrap_or_else(|_| error_json("Failed to serialize White test result")),
466        Err(e) => error_json(&e.to_string()),
467    }
468}
469
470/// Performs the R method White test for heteroscedasticity via WASM.
471///
472/// This implementation matches R's `skedastic::white()` function behavior.
473/// Uses the standard QR decomposition and the R-specific auxiliary matrix
474/// structure (intercept, X, X² only - no cross-products).
475///
476/// # Arguments
477///
478/// * `y_json` - JSON array of response variable values
479/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
480///
481/// # Returns
482///
483/// JSON string containing test statistic, p-value, and interpretation.
484///
485/// # Errors
486///
487/// Returns a JSON error object if parsing fails or domain check fails.
488#[cfg(feature = "wasm")]
489#[wasm_bindgen]
490pub fn r_white_test(
491    y_json: &str,
492    x_vars_json: &str,
493) -> String {
494    if let Err(e) = check_domain() {
495        return error_to_json(&e);
496    }
497
498    let y: Vec<f64> = match serde_json::from_str(y_json) {
499        Ok(v) => v,
500        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
501    };
502
503    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
504        Ok(v) => v,
505        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
506    };
507
508    match diagnostics::r_white_method(&y, &x_vars) {
509        Ok(output) => serde_json::to_string(&output)
510            .unwrap_or_else(|_| error_json("Failed to serialize R White test result")),
511        Err(e) => error_json(&e.to_string()),
512    }
513}
514
515/// Performs the Python method White test for heteroscedasticity via WASM.
516///
517/// This implementation matches Python's `statsmodels.stats.diagnostic.het_white()` function.
518/// Uses the LINPACK QR decomposition with column pivoting and the Python-specific
519/// auxiliary matrix structure (intercept, X, X², and cross-products).
520///
521/// # Arguments
522///
523/// * `y_json` - JSON array of response variable values
524/// * `x_vars_json` - JSON array of predictor arrays (each array is a column)
525///
526/// # Returns
527///
528/// JSON string containing test statistic, p-value, and interpretation.
529///
530/// # Errors
531///
532/// Returns a JSON error object if parsing fails or domain check fails.
533#[cfg(feature = "wasm")]
534#[wasm_bindgen]
535pub fn python_white_test(
536    y_json: &str,
537    x_vars_json: &str,
538) -> String {
539    if let Err(e) = check_domain() {
540        return error_to_json(&e);
541    }
542
543    let y: Vec<f64> = match serde_json::from_str(y_json) {
544        Ok(v) => v,
545        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
546    };
547
548    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
549        Ok(v) => v,
550        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
551    };
552
553    match diagnostics::python_white_method(&y, &x_vars) {
554        Ok(output) => serde_json::to_string(&output)
555            .unwrap_or_else(|_| error_json("Failed to serialize Python White test result")),
556        Err(e) => error_json(&e.to_string()),
557    }
558}
559
560/// Performs the Jarque-Bera test for normality via WASM.
561///
562/// The Jarque-Bera test checks whether the residuals are normally distributed
563/// by examining skewness and kurtosis. A significant p-value suggests that
564/// the residuals deviate from normality.
565///
566/// # Arguments
567///
568/// * `y_json` - JSON array of response variable values
569/// * `x_vars_json` - JSON array of predictor arrays
570///
571/// # Returns
572///
573/// JSON string containing test statistic, p-value, and interpretation.
574///
575/// # Errors
576///
577/// Returns a JSON error object if parsing fails or domain check fails.
578#[cfg(feature = "wasm")]
579#[wasm_bindgen]
580pub fn jarque_bera_test(
581    y_json: &str,
582    x_vars_json: &str,
583) -> String {
584    if let Err(e) = check_domain() {
585        return error_to_json(&e);
586    }
587
588    let y: Vec<f64> = match serde_json::from_str(y_json) {
589        Ok(v) => v,
590        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
591    };
592
593    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
594        Ok(v) => v,
595        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
596    };
597
598    match diagnostics::jarque_bera_test(&y, &x_vars) {
599        Ok(output) => serde_json::to_string(&output)
600            .unwrap_or_else(|_| error_json("Failed to serialize Jarque-Bera test result")),
601        Err(e) => error_json(&e.to_string()),
602    }
603}
604
605// ============================================================================
606// Durbin-Watson Test (WASM wrapper)
607// ============================================================================
608
609#[cfg(feature = "wasm")]
610#[wasm_bindgen]
611/// Performs the Durbin-Watson test for autocorrelation via WASM.
612///
613/// The Durbin-Watson test checks for autocorrelation in the residuals.
614/// Values near 2 indicate no autocorrelation, values near 0 suggest positive
615/// autocorrelation, and values near 4 suggest negative autocorrelation.
616///
617/// # Arguments
618///
619/// * `y_json` - JSON array of response variable values
620/// * `x_vars_json` - JSON array of predictor arrays
621///
622/// # Returns
623///
624/// JSON string containing the DW statistic and interpretation.
625///
626/// # Errors
627///
628/// Returns a JSON error object if parsing fails or domain check fails.
629pub fn durbin_watson_test(
630    y_json: &str,
631    x_vars_json: &str,
632) -> String {
633    if let Err(e) = check_domain() {
634        return error_to_json(&e);
635    }
636
637    let y: Vec<f64> = match serde_json::from_str(y_json) {
638        Ok(v) => v,
639        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
640    };
641
642    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
643        Ok(v) => v,
644        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
645    };
646
647    match diagnostics::durbin_watson_test(&y, &x_vars) {
648        Ok(output) => serde_json::to_string(&output)
649            .unwrap_or_else(|_| error_json("Failed to serialize Durbin-Watson test result")),
650        Err(e) => error_json(&e.to_string()),
651    }
652}
653
654// ============================================================================
655// Shapiro-Wilk Test (WASM wrapper)
656// ============================================================================
657
658/// Performs the Shapiro-Wilk test for normality via WASM.
659///
660/// The Shapiro-Wilk test is a powerful tests for normality,
661/// especially for small to moderate sample sizes (3 ≤ n ≤ 5000). It tests
662/// the null hypothesis that the residuals are normally distributed.
663///
664/// # Arguments
665///
666/// * `y_json` - JSON array of response variable values
667/// * `x_vars_json` - JSON array of predictor arrays
668///
669/// # Returns
670///
671/// JSON string containing the W statistic (ranges from 0 to 1), p-value,
672/// and interpretation.
673///
674/// # Errors
675///
676/// Returns a JSON error object if parsing fails or domain check fails.
677#[cfg(feature = "wasm")]
678#[wasm_bindgen]
679pub fn shapiro_wilk_test(
680    y_json: &str,
681    x_vars_json: &str,
682) -> String {
683    if let Err(e) = check_domain() {
684        return error_to_json(&e);
685    }
686
687    let y: Vec<f64> = match serde_json::from_str(y_json) {
688        Ok(v) => v,
689        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
690    };
691
692    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
693        Ok(v) => v,
694        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
695    };
696
697    match diagnostics::shapiro_wilk_test(&y, &x_vars) {
698        Ok(output) => serde_json::to_string(&output)
699            .unwrap_or_else(|_| error_json("Failed to serialize Shapiro-Wilk test result")),
700        Err(e) => error_json(&e.to_string()),
701    }
702}
703
704#[cfg(feature = "wasm")]
705#[wasm_bindgen]
706/// Performs the Anderson-Darling test for normality via WASM.
707///
708/// The Anderson-Darling test checks whether the residuals are normally distributed
709/// by comparing the empirical distribution to the expected normal distribution.
710/// This test is particularly sensitive to deviations in the tails of the distribution.
711/// A significant p-value suggests that the residuals deviate from normality.
712///
713/// # Arguments
714///
715/// * `y_json` - JSON array of response variable values
716/// * `x_vars_json` - JSON array of predictor arrays
717///
718/// # Returns
719///
720/// JSON string containing the A² statistic, p-value, and interpretation.
721///
722/// # Errors
723///
724/// Returns a JSON error object if parsing fails or domain check fails.
725pub fn anderson_darling_test(
726    y_json: &str,
727    x_vars_json: &str,
728) -> String {
729    if let Err(e) = check_domain() {
730        return error_to_json(&e);
731    }
732
733    let y: Vec<f64> = match serde_json::from_str(y_json) {
734        Ok(v) => v,
735        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
736    };
737
738    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
739        Ok(v) => v,
740        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
741    };
742
743    match diagnostics::anderson_darling_test(&y, &x_vars) {
744        Ok(output) => serde_json::to_string(&output)
745            .unwrap_or_else(|_| error_json("Failed to serialize Anderson-Darling test result")),
746        Err(e) => error_json(&e.to_string()),
747    }
748}
749
750// ============================================================================
751// Cook's Distance (WASM wrapper)
752// ============================================================================
753
754#[cfg(feature = "wasm")]
755#[wasm_bindgen]
756/// Computes Cook's distance for identifying influential observations via WASM.
757///
758/// Cook's distance measures how much each observation influences the regression
759/// model by comparing coefficient estimates with and without that observation.
760/// Unlike hypothesis tests, this is an influence measure - not a test with p-values.
761///
762/// # Arguments
763///
764/// * `y_json` - JSON array of response variable values
765/// * `x_vars_json` - JSON array of predictor arrays
766///
767/// # Returns
768///
769/// JSON string containing:
770/// - Vector of Cook's distances (one per observation)
771/// - Thresholds for identifying influential observations
772/// - Indices of potentially influential observations
773/// - Interpretation and guidance
774///
775/// # Errors
776///
777/// Returns a JSON error object if parsing fails or domain check fails.
778pub fn cooks_distance_test(
779    y_json: &str,
780    x_vars_json: &str,
781) -> String {
782    if let Err(e) = check_domain() {
783        return error_to_json(&e);
784    }
785
786    let y: Vec<f64> = match serde_json::from_str(y_json) {
787        Ok(v) => v,
788        Err(e) => return error_json(&format!("Failed to parse y: {}", e)),
789    };
790
791    let x_vars: Vec<Vec<f64>> = match serde_json::from_str(x_vars_json) {
792        Ok(v) => v,
793        Err(e) => return error_json(&format!("Failed to parse x_vars: {}", e)),
794    };
795
796    match diagnostics::cooks_distance_test(&y, &x_vars) {
797        Ok(output) => serde_json::to_string(&output)
798            .unwrap_or_else(|_| error_json("Failed to serialize Cook's distance result")),
799        Err(e) => error_json(&e.to_string()),
800    }
801}
802
803// ============================================================================
804// Statistical Utility Functions (WASM wrappers)
805// ============================================================================
806
807#[cfg(feature = "wasm")]
808#[wasm_bindgen]
809/// Computes the Student's t-distribution cumulative distribution function.
810///
811/// Returns P(T ≤ t) for a t-distribution with the given degrees of freedom.
812///
813/// # Arguments
814///
815/// * `t` - t-statistic value
816/// * `df` - Degrees of freedom
817///
818/// # Returns
819///
820/// The CDF value, or `NaN` if domain check fails.
821pub fn get_t_cdf(t: f64, df: f64) -> f64 {
822    if check_domain().is_err() {
823        return f64::NAN;
824    }
825
826    student_t_cdf(t, df)
827}
828
829#[cfg(feature = "wasm")]
830#[wasm_bindgen]
831/// Computes the critical t-value for a given significance level.
832///
833/// Returns the t-value such that the area under the t-distribution curve
834/// to the right equals alpha/2 (two-tailed test).
835///
836/// # Arguments
837///
838/// * `alpha` - Significance level (typically 0.05 for 95% confidence)
839/// * `df` - Degrees of freedom
840///
841/// # Returns
842///
843/// The critical t-value, or `NaN` if domain check fails.
844pub fn get_t_critical(alpha: f64, df: f64) -> f64 {
845    if check_domain().is_err() {
846        return f64::NAN;
847    }
848
849    core::t_critical_quantile(df, alpha)
850}
851
852#[cfg(feature = "wasm")]
853#[wasm_bindgen]
854/// Computes the inverse of the standard normal CDF (probit function).
855///
856/// Returns the z-score such that P(Z ≤ z) = p for a standard normal distribution.
857///
858/// # Arguments
859///
860/// * `p` - Probability (0 < p < 1)
861///
862/// # Returns
863///
864/// The z-score, or `NaN` if domain check fails.
865pub fn get_normal_inverse(p: f64) -> f64 {
866    if check_domain().is_err() {
867        return f64::NAN;
868    }
869
870    normal_inverse_cdf(p)
871}
872
873// ============================================================================
874// Domain Check (WASM-only)
875// ============================================================================
876//
877// By default, all domains are allowed. To enable domain restriction, set the
878// LINREG_DOMAIN_RESTRICT environment variable at build time:
879//
880//   LINREG_DOMAIN_RESTRICT=example.com,yoursite.com wasm-pack build
881//
882// Example for jesse-anderson.net:
883//   LINREG_DOMAIN_RESTRICT=jesse-anderson.net,tools.jesse-anderson.net,localhost,127.0.0.1 wasm-pack build
884//
885// This allows downstream users to use the library without modification while
886// still providing domain restriction as an opt-in security feature.
887
888#[cfg(feature = "wasm")]
889fn check_domain() -> Result<()> {
890    // Read allowed domains from build-time environment variable
891    let allowed_domains = option_env!("LINREG_DOMAIN_RESTRICT");
892
893    match allowed_domains {
894        Some(domains) if !domains.is_empty() => {
895            // Domain restriction is enabled
896            let window = web_sys::window().ok_or(Error::DomainCheck("No window found".to_string()))?;
897            let location = window.location();
898            let hostname = location.hostname().map_err(|_| Error::DomainCheck("No hostname found".to_string()))?;
899
900            let domain_list: Vec<&str> = domains.split(',').map(|s| s.trim()).collect();
901
902            if domain_list.contains(&hostname.as_str()) {
903                Ok(())
904            } else {
905                Err(Error::DomainCheck(format!(
906                    "Unauthorized domain: {}. Allowed: {}",
907                    hostname, domains
908                )))
909            }
910        }
911        _ => {
912            // No restriction - allow all domains
913            Ok(())
914        }
915    }
916}
917
918// ============================================================================
919// Test Functions (WASM-only)
920// ============================================================================
921
922#[cfg(test)]
923mod tests {
924    use super::*;
925
926    #[test]
927    fn verify_housing_regression_integrity() {
928        let result = test_housing_regression_native();
929        if let Err(e) = result {
930            panic!("Regression test failed: {}", e);
931        }
932    }
933}
934
935#[cfg(feature = "wasm")]
936#[wasm_bindgen]
937/// Simple test function to verify WASM is working.
938///
939/// Returns a success message confirming the WASM module loaded correctly.
940///
941/// # Errors
942///
943/// Returns a JSON error object if domain check fails.
944pub fn test() -> String {
945    if let Err(e) = check_domain() {
946        return error_to_json(&e);
947    }
948    "Rust WASM is working!".to_string()
949}
950
951#[cfg(feature = "wasm")]
952#[wasm_bindgen]
953/// Returns the current version of the library.
954///
955/// Returns the Cargo package version as a string (e.g., "0.1.0").
956///
957/// # Errors
958///
959/// Returns a JSON error object if domain check fails.
960pub fn get_version() -> String {
961    if let Err(e) = check_domain() {
962        return error_to_json(&e);
963    }
964    env!("CARGO_PKG_VERSION").to_string()
965}
966
967#[cfg(feature = "wasm")]
968#[wasm_bindgen]
969/// Test function for t-critical value computation.
970///
971/// Returns JSON with the computed t-critical value for the given parameters.
972///
973/// # Errors
974///
975/// Returns a JSON error object if domain check fails.
976pub fn test_t_critical(df: f64, alpha: f64) -> String {
977    if let Err(e) = check_domain() {
978        return error_to_json(&e);
979    }
980    let t_crit = core::t_critical_quantile(df, alpha);
981    format!(r#"{{"df": {}, "alpha": {}, "t_critical": {}}}"#, df, alpha, t_crit)
982}
983
984#[cfg(feature = "wasm")]
985#[wasm_bindgen]
986/// Test function for confidence interval computation.
987///
988/// Returns JSON with the computed confidence interval for a coefficient.
989///
990/// # Errors
991///
992/// Returns a JSON error object if domain check fails.
993pub fn test_ci(coef: f64, se: f64, df: f64, alpha: f64) -> String {
994    if let Err(e) = check_domain() {
995        return error_to_json(&e);
996    }
997    let t_crit = core::t_critical_quantile(df, alpha);
998    format!(r#"{{"lower": {}, "upper": {}}}"#, coef - t_crit * se, coef + t_crit * se)
999}
1000
1001#[cfg(feature = "wasm")]
1002#[wasm_bindgen]
1003/// Test function for R accuracy validation.
1004///
1005/// Returns JSON comparing our statistical functions against R reference values.
1006///
1007/// # Errors
1008///
1009/// Returns a JSON error object if domain check fails.
1010pub fn test_r_accuracy() -> String {
1011    if let Err(e) = check_domain() {
1012        return error_to_json(&e);
1013    }
1014    format!(
1015        r#"{{"two_tail_p": {}, "qt_975": {}}}"#,
1016        core::two_tailed_p_value(1.6717, 21.0),
1017        core::t_critical_quantile(21.0, 0.05)
1018    )
1019}
1020
1021#[cfg(feature = "wasm")]
1022#[wasm_bindgen]
1023/// Test function for regression validation against R reference values.
1024///
1025/// Runs a regression on a housing dataset and compares results against R's lm() output.
1026/// Returns JSON with status "PASS" or "FAIL" with details.
1027///
1028/// # Errors
1029///
1030/// Returns a JSON error object if domain check fails.
1031pub fn test_housing_regression() -> String {
1032    if let Err(e) = check_domain() {
1033        return error_to_json(&e);
1034    }
1035
1036    match test_housing_regression_native() {
1037        Ok(result) => result,
1038        Err(e) => serde_json::json!({ "status": "ERROR", "error": e.to_string() }).to_string()
1039    }
1040}
1041
1042// Native Rust test function (works without WASM feature)
1043fn test_housing_regression_native() -> Result<String> {
1044    let y = vec![
1045        245.5, 312.8, 198.4, 425.6, 278.9, 356.2, 189.5, 512.3, 234.7, 298.1,
1046        445.8, 167.9, 367.4, 289.6, 198.2, 478.5, 256.3, 334.7, 178.5, 398.9,
1047        223.4, 312.5, 156.8, 423.7, 267.9
1048    ];
1049
1050    let square_feet = vec![
1051        1200.0, 1800.0, 950.0, 2400.0, 1450.0, 2000.0, 1100.0, 2800.0, 1350.0, 1650.0,
1052        2200.0, 900.0, 1950.0, 1500.0, 1050.0, 2600.0, 1300.0, 1850.0, 1000.0, 2100.0,
1053        1250.0, 1700.0, 850.0, 2350.0, 1400.0
1054    ];
1055    let bedrooms = vec![
1056        3.0, 4.0, 2.0, 4.0, 3.0, 4.0, 2.0, 5.0, 3.0, 3.0,
1057        4.0, 2.0, 4.0, 3.0, 2.0, 5.0, 3.0, 4.0, 2.0, 4.0,
1058        3.0, 3.0, 2.0, 4.0, 3.0
1059    ];
1060    let age = vec![
1061        15.0, 10.0, 25.0, 5.0, 8.0, 12.0, 20.0, 2.0, 18.0, 7.0,
1062        3.0, 30.0, 6.0, 14.0, 22.0, 1.0, 16.0, 9.0, 28.0, 4.0,
1063        19.0, 11.0, 35.0, 3.0, 13.0
1064    ];
1065
1066    let x_vars = vec![square_feet, bedrooms, age];
1067    let names = vec!["Intercept".to_string(), "Square_Feet".to_string(), "Bedrooms".to_string(), "Age".to_string()];
1068
1069    let result = core::ols_regression(&y, &x_vars, &names)?;
1070
1071    // Check against R results
1072    let expected_coeffs = [52.1271333, 0.1613877, 0.9545492, -1.1811815];
1073    let expected_std_errs = [31.18201809, 0.01875072, 10.44400198, 0.73219949];
1074
1075    let tolerance = 1e-4;
1076    let mut mismatches = vec![];
1077
1078    for i in 0..4 {
1079        if (result.coefficients[i] - expected_coeffs[i]).abs() > tolerance {
1080            mismatches.push(format!("coeff[{}] differs: got {}, expected {}", i, result.coefficients[i], expected_coeffs[i]));
1081        }
1082        if (result.std_errors[i] - expected_std_errs[i]).abs() > tolerance {
1083            mismatches.push(format!("std_err[{}] differs: got {}, expected {}", i, result.std_errors[i], expected_std_errs[i]));
1084        }
1085    }
1086
1087    if mismatches.is_empty() {
1088        Ok(serde_json::json!({ "status": "PASS" }).to_string())
1089    } else {
1090        Ok(serde_json::json!({ "status": "FAIL", "mismatches": mismatches }).to_string())
1091    }
1092}
linreg_core/lib.rs

linreg_core/
lib.rs