Skip to main content

so_utils/
validation.rs

1//! Data validation utilities
2//!
3//! This module provides functions for checking data quality and validity
4//! before statistical analysis.
5
6use ndarray::{Array1, Array2};
7use so_core::error::{Error, Result};
8
9/// Check if array contains any NaN values
10pub fn has_nan(arr: &Array1<f64>) -> bool {
11    arr.iter().any(|&x| x.is_nan())
12}
13
14/// Check if array contains any infinite values
15pub fn has_inf(arr: &Array1<f64>) -> bool {
16    arr.iter().any(|&x| x.is_infinite())
17}
18
19/// Check if array contains any NaN or infinite values
20pub fn is_finite_array(arr: &Array1<f64>) -> bool {
21    arr.iter().all(|&x| x.is_finite())
22}
23
24/// Validate data for statistical analysis
25pub fn validate_data(arr: &Array1<f64>) -> Result<()> {
26    if has_nan(arr) {
27        return Err(Error::DataError("Data contains NaN values".to_string()));
28    }
29    if has_inf(arr) {
30        return Err(Error::DataError(
31            "Data contains infinite values".to_string(),
32        ));
33    }
34    Ok(())
35}
36
37/// Check if 2D array has consistent dimensions
38pub fn validate_matrix(mat: &Array2<f64>) -> Result<()> {
39    if mat.shape()[0] == 0 || mat.shape()[1] == 0 {
40        return Err(Error::DimensionMismatch(
41            "Matrix has zero dimensions".to_string(),
42        ));
43    }
44    Ok(())
45}
46
47/// Check if array has at least n elements
48pub fn check_min_samples(arr: &Array1<f64>, min_samples: usize) -> Result<()> {
49    if arr.len() < min_samples {
50        return Err(Error::DataError(format!(
51            "Insufficient samples: {} < {}",
52            arr.len(),
53            min_samples
54        )));
55    }
56    Ok(())
57}