Skip to main content

u_analytics/capability/
nonnormal.rs

1//! Non-normal process capability using Box-Cox transformation.
2//!
3//! When process data is non-normal (e.g., right-skewed exponential-like data),
4//! standard capability indices (Cp, Cpk) computed on raw data can be misleading.
5//! This module applies the Box-Cox power transformation to map the data to
6//! approximate normality, then computes standard capability indices on the
7//! transformed scale.
8//!
9//! # Algorithm
10//!
11//! 1. Estimate the optimal λ via maximum likelihood (`estimate_lambda`).
12//! 2. Transform the data: `y(λ)`.
13//! 3. Transform the specification limits using the same λ.
14//! 4. Compute capability indices on the transformed scale.
15//!
16//! # References
17//!
18//! - Box, G. E. P. & Cox, D. R. (1964). "An analysis of transformations."
19//!   *Journal of the Royal Statistical Society, Series B*, 26(2), 211–252.
20//! - Clements, J. A. (1989). "Process capability calculations for non-normal
21//!   distributions." *Quality Progress*, 22(9), 95–100.
22
23use std::fmt;
24
25use u_numflow::transforms::{box_cox, estimate_lambda, TransformError};
26
27use crate::capability::{CapabilityIndices, ProcessCapability};
28
29// ── Error type ────────────────────────────────────────────────────────────────
30
31/// Errors that can arise from non-normal process capability analysis.
32#[derive(Debug, Clone, PartialEq)]
33pub enum NonNormalCapabilityError {
34    /// All data values must be strictly positive for Box-Cox transformation.
35    NonPositiveData,
36    /// At least 4 data points are required for reliable capability analysis.
37    InsufficientData,
38    /// Failed to transform a specification limit (e.g., limit is not positive).
39    SpecTransformError,
40    /// Capability computation failed (e.g., all transformed data are identical).
41    CapabilityError,
42    /// At least one specification limit (USL or LSL) must be provided.
43    NoSpecLimits,
44}
45
46impl fmt::Display for NonNormalCapabilityError {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        match self {
49            NonNormalCapabilityError::NonPositiveData => {
50                write!(
51                    f,
52                    "Box-Cox requires all data values to be strictly positive"
53                )
54            }
55            NonNormalCapabilityError::InsufficientData => {
56                write!(
57                    f,
58                    "at least 4 data points are required for capability analysis"
59                )
60            }
61            NonNormalCapabilityError::SpecTransformError => {
62                write!(
63                    f,
64                    "failed to transform specification limit — limit must be positive"
65                )
66            }
67            NonNormalCapabilityError::CapabilityError => {
68                write!(
69                    f,
70                    "capability computation failed — check that data has non-zero variance"
71                )
72            }
73            NonNormalCapabilityError::NoSpecLimits => {
74                write!(
75                    f,
76                    "at least one specification limit (USL or LSL) must be provided"
77                )
78            }
79        }
80    }
81}
82
83impl std::error::Error for NonNormalCapabilityError {}
84
85impl From<TransformError> for NonNormalCapabilityError {
86    fn from(e: TransformError) -> Self {
87        match e {
88            TransformError::NonPositiveData => NonNormalCapabilityError::NonPositiveData,
89            TransformError::InsufficientData => NonNormalCapabilityError::InsufficientData,
90            TransformError::InvalidInverse => NonNormalCapabilityError::SpecTransformError,
91        }
92    }
93}
94
95// ── Result type ───────────────────────────────────────────────────────────────
96
97/// Result of a Box-Cox-based non-normal capability analysis.
98#[derive(Debug, Clone)]
99pub struct NonNormalCapabilityResult {
100    /// The estimated optimal Box-Cox transformation parameter λ.
101    ///
102    /// λ ≈ 0 corresponds to a log transform; λ = 1 is the identity (no transform);
103    /// λ = 0.5 is approximately a square-root transform.
104    pub lambda: f64,
105    /// Capability indices computed on the Box-Cox-transformed scale.
106    pub indices: CapabilityIndices,
107}
108
109// ── Public API ────────────────────────────────────────────────────────────────
110
111/// Compute process capability indices for non-normal data via Box-Cox transformation.
112///
113/// The data are first transformed to approximate normality using the optimal
114/// Box-Cox parameter λ (estimated via maximum likelihood over `[-2, 2]`).
115/// Specification limits are transformed using the same λ. Standard capability
116/// indices (Cp, Cpk, Pp, Ppk) are then computed on the transformed scale.
117///
118/// # Arguments
119///
120/// * `data` — Process observations. All values must be strictly positive.
121/// * `usl` — Upper specification limit (optional). Must be positive if provided.
122/// * `lsl` — Lower specification limit (optional). Must be positive if provided.
123///
124/// # Errors
125///
126/// Returns [`NonNormalCapabilityError`] if:
127/// - Fewer than 4 data points are provided.
128/// - Any data value is ≤ 0 (Box-Cox requires strictly positive data).
129/// - Neither `usl` nor `lsl` is provided.
130/// - A specification limit is ≤ 0 (cannot be Box-Cox transformed).
131/// - Capability computation fails (e.g., zero variance in transformed data).
132///
133/// # Examples
134///
135/// ```
136/// use u_analytics::capability::boxcox_capability;
137///
138/// // Right-skewed data
139/// let data: Vec<f64> = (1..=20).map(|i| (i as f64 * 0.3_f64).exp()).collect();
140/// let result = boxcox_capability(&data, Some(100.0), Some(1.0)).unwrap();
141/// assert!(result.lambda >= -2.0 && result.lambda <= 2.0);
142/// assert!(result.indices.ppk.is_some());
143/// ```
144pub fn boxcox_capability(
145    data: &[f64],
146    usl: Option<f64>,
147    lsl: Option<f64>,
148) -> Result<NonNormalCapabilityResult, NonNormalCapabilityError> {
149    // Validate: at least one spec limit
150    if usl.is_none() && lsl.is_none() {
151        return Err(NonNormalCapabilityError::NoSpecLimits);
152    }
153
154    // Validate: sufficient data
155    if data.len() < 4 {
156        return Err(NonNormalCapabilityError::InsufficientData);
157    }
158
159    // Validate: all data must be strictly positive
160    if data.iter().any(|&v| v <= 0.0) {
161        return Err(NonNormalCapabilityError::NonPositiveData);
162    }
163
164    // Estimate optimal λ
165    let lambda = estimate_lambda(data, -2.0, 2.0)?;
166
167    // Transform data
168    let y_t = box_cox(data, lambda)?;
169
170    // Transform spec limits (box_cox requires len >= 2, use a dummy second element)
171    let usl_t = usl
172        .map(|u| {
173            if u <= 0.0 {
174                return Err(NonNormalCapabilityError::SpecTransformError);
175            }
176            // pair with data[0] (positive) so box_cox gets len=2
177            let pair = [u, data[0]];
178            box_cox(&pair, lambda)
179                .map(|v| v[0])
180                .map_err(|_| NonNormalCapabilityError::SpecTransformError)
181        })
182        .transpose()?;
183
184    let lsl_t = lsl
185        .map(|l| {
186            if l <= 0.0 {
187                return Err(NonNormalCapabilityError::SpecTransformError);
188            }
189            let pair = [l, data[0]];
190            box_cox(&pair, lambda)
191                .map(|v| v[0])
192                .map_err(|_| NonNormalCapabilityError::SpecTransformError)
193        })
194        .transpose()?;
195
196    // Compute overall std of transformed data (for Pp/Ppk)
197    let n = y_t.len();
198    let mean_t = y_t.iter().sum::<f64>() / n as f64;
199    let overall_std_t =
200        (y_t.iter().map(|&v| (v - mean_t).powi(2)).sum::<f64>() / (n - 1) as f64).sqrt();
201
202    // Build ProcessCapability on transformed scale
203    // ProcessCapability::new validates usl > lsl when both present
204    let spec = ProcessCapability::new(usl_t, lsl_t)
205        .map_err(|_| NonNormalCapabilityError::CapabilityError)?;
206
207    // compute() uses overall_std_t as sigma_within (no rational subgrouping)
208    let indices = spec
209        .compute(&y_t, overall_std_t)
210        .ok_or(NonNormalCapabilityError::CapabilityError)?;
211
212    Ok(NonNormalCapabilityResult { lambda, indices })
213}
214
215// ── Tests ─────────────────────────────────────────────────────────────────────
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220
221    #[test]
222    fn boxcox_capability_skewed_data() {
223        // Right-skewed exponential-like data
224        let data: Vec<f64> = (1..=25).map(|i| (i as f64 * 0.2).exp()).collect();
225        let result = boxcox_capability(&data, Some(150.0), Some(1.0)).unwrap();
226        assert!(result.lambda.abs() < 0.6, "lambda={}", result.lambda);
227        assert!(result.indices.pp.is_some() || result.indices.ppk.is_some());
228    }
229
230    #[test]
231    fn boxcox_capability_non_positive_error() {
232        let data = vec![1.0, -1.0, 2.0, 3.0, 4.0, 5.0];
233        assert!(boxcox_capability(&data, Some(10.0), None).is_err());
234    }
235
236    #[test]
237    fn boxcox_capability_insufficient_data() {
238        let data = vec![1.0, 2.0, 3.0]; // < 4 points
239        assert!(boxcox_capability(&data, Some(10.0), None).is_err());
240    }
241
242    #[test]
243    fn boxcox_capability_lambda_in_range() {
244        let data: Vec<f64> = (1..=20).map(|i| i as f64).collect();
245        let result = boxcox_capability(&data, Some(25.0), Some(0.5)).unwrap();
246        assert!(result.lambda >= -2.0 && result.lambda <= 2.0);
247    }
248
249    #[test]
250    fn boxcox_capability_no_spec_error() {
251        let data: Vec<f64> = (1..=10).map(|i| i as f64).collect();
252        assert!(boxcox_capability(&data, None, None).is_err());
253    }
254
255    #[test]
256    fn boxcox_capability_usl_only() {
257        let data: Vec<f64> = (1..=20).map(|i| i as f64 * 0.5).collect();
258        let result = boxcox_capability(&data, Some(20.0), None).unwrap();
259        // With USL only: pp is None (needs both limits), ppk should be Some
260        assert!(result.indices.ppk.is_some());
261        assert!(result.indices.pp.is_none());
262    }
263
264    #[test]
265    fn boxcox_capability_lsl_only() {
266        let data: Vec<f64> = (1..=20).map(|i| i as f64).collect();
267        let result = boxcox_capability(&data, None, Some(0.5)).unwrap();
268        assert!(result.indices.ppk.is_some());
269        assert!(result.indices.pp.is_none());
270    }
271
272    #[test]
273    fn boxcox_capability_two_sided() {
274        // Both USL and LSL provided → pp, ppk, cp, cpk all Some
275        let data: Vec<f64> = (1..=30).map(|i| (i as f64 * 0.1).exp()).collect();
276        let result = boxcox_capability(&data, Some(20.0), Some(1.0)).unwrap();
277        assert!(result.indices.pp.is_some());
278        assert!(result.indices.ppk.is_some());
279        assert!(result.indices.cp.is_some());
280        assert!(result.indices.cpk.is_some());
281    }
282
283    #[test]
284    fn boxcox_capability_non_positive_spec_error() {
285        let data: Vec<f64> = (1..=10).map(|i| i as f64).collect();
286        // LSL = -1 is non-positive → SpecTransformError
287        assert!(boxcox_capability(&data, Some(20.0), Some(-1.0)).is_err());
288    }
289
290    #[test]
291    fn boxcox_capability_result_has_valid_lambda() {
292        let data: Vec<f64> = (1..=15).map(|i| (i as f64).powi(2)).collect();
293        let result = boxcox_capability(&data, Some(250.0), Some(0.5)).unwrap();
294        assert!(result.lambda.is_finite());
295        assert!(result.lambda >= -2.0 && result.lambda <= 2.0);
296    }
297}