Skip to main content

u_analytics/capability/
percentile.rs

1//! Percentile-based process capability (ISO 22514-2).
2//!
3//! When process data is non-normal, traditional capability indices based on
4//! mean and standard deviation can be misleading. The percentile method uses
5//! empirical quantiles (0.135th and 99.865th percentiles) as the natural
6//! process spread — equivalent to the ±3σ spread for normal data.
7//!
8//! # Indices
9//!
10//! - **Cp\*** — (USL − LSL) / (X₉₉.₈₆₅ − X₀.₁₃₅)
11//! - **Cpk\*** — min(Cpu\*, Cpl\*)
12//! - **Cpu\*** — (USL − median) / (X₉₉.₈₆₅ − median)
13//! - **Cpl\*** — (median − LSL) / (median − X₀.₁₃₅)
14//!
15//! # References
16//!
17//! - ISO 22514-2:2017. "Statistical methods in process management — Capability and performance",
18//!   §4.3, percentile method.
19//! - Clements, J.A. (1989). "Process capability calculations for non-normal distributions."
20
21use u_numflow::stats;
22
23/// Result of percentile-based process capability analysis.
24#[derive(Debug, Clone)]
25pub struct PercentileCapabilityResult {
26    /// Cp* = (USL − LSL) / (X₉₉.₈₆₅ − X₀.₁₃₅). Requires both limits.
27    pub cp_star: Option<f64>,
28    /// Cpk* = min(Cpu*, Cpl*). Requires at least one limit.
29    pub cpk_star: Option<f64>,
30    /// Cpu* = (USL − median) / (X₉₉.₈₆₅ − median). Requires USL.
31    pub cpu_star: Option<f64>,
32    /// Cpl* = (median − LSL) / (median − X₀.₁₃₅). Requires LSL.
33    pub cpl_star: Option<f64>,
34    /// Sample median.
35    pub median: f64,
36    /// 0.135th percentile value (lower natural process limit).
37    pub percentile_lower: f64,
38    /// 99.865th percentile value (upper natural process limit).
39    pub percentile_upper: f64,
40}
41
42/// Compute percentile-based capability indices (ISO 22514-2).
43///
44/// Uses empirical quantiles at 0.00135 and 0.99865 (corresponding to ±3σ
45/// for normal distributions) to define the natural process spread.
46///
47/// # Arguments
48///
49/// * `data` — Process observations (at least 20 data points required).
50/// * `lsl` — Lower specification limit (optional).
51/// * `usl` — Upper specification limit (optional).
52///
53/// # Returns
54///
55/// `Err` if:
56/// - Fewer than 20 data points (insufficient for extreme percentile estimation).
57/// - Neither `usl` nor `lsl` is provided.
58/// - Data contains non-finite values.
59/// - The percentile spread is zero or negative (degenerate data).
60///
61/// # Examples
62///
63/// ```
64/// use u_analytics::capability::percentile_capability;
65///
66/// let data: Vec<f64> = (0..100).map(|i| 10.0 + (i as f64) * 0.1).collect();
67/// let result = percentile_capability(&data, Some(5.0), Some(15.0)).unwrap();
68/// assert!(result.cp_star.is_some());
69/// assert!(result.median > 0.0);
70/// ```
71pub fn percentile_capability(
72    data: &[f64],
73    lsl: Option<f64>,
74    usl: Option<f64>,
75) -> Result<PercentileCapabilityResult, &'static str> {
76    // Validate inputs
77    if usl.is_none() && lsl.is_none() {
78        return Err("at least one specification limit (USL or LSL) is required");
79    }
80    if data.len() < 20 {
81        return Err("at least 20 data points are required for percentile capability");
82    }
83    if data.iter().any(|v| !v.is_finite()) {
84        return Err("all data values must be finite");
85    }
86    if let (Some(u), Some(l)) = (usl, lsl) {
87        if u <= l {
88            return Err("USL must be greater than LSL");
89        }
90    }
91
92    // Compute percentiles using u-numflow (R-7 linear interpolation)
93    let p_lower = stats::quantile(data, 0.00135).ok_or("failed to compute 0.135th percentile")?;
94    let p_upper = stats::quantile(data, 0.99865).ok_or("failed to compute 99.865th percentile")?;
95    let median = stats::quantile(data, 0.5).ok_or("failed to compute median")?;
96
97    // Validate spread
98    let spread = p_upper - p_lower;
99    if spread < 1e-300 {
100        return Err("percentile spread is zero — data has no variation");
101    }
102
103    let upper_spread = p_upper - median;
104    let lower_spread = median - p_lower;
105
106    // Cp* = (USL - LSL) / (p_upper - p_lower)
107    let cp_star = match (usl, lsl) {
108        (Some(u), Some(l)) => Some((u - l) / spread),
109        _ => None,
110    };
111
112    // Cpu* = (USL - median) / (p_upper - median)
113    let cpu_star = usl.and_then(|u| {
114        if upper_spread > 1e-300 {
115            Some((u - median) / upper_spread)
116        } else {
117            None
118        }
119    });
120
121    // Cpl* = (median - LSL) / (median - p_lower)
122    let cpl_star = lsl.and_then(|l| {
123        if lower_spread > 1e-300 {
124            Some((median - l) / lower_spread)
125        } else {
126            None
127        }
128    });
129
130    // Cpk* = min of available Cpu*/Cpl*
131    let cpk_star = match (cpu_star, cpl_star) {
132        (Some(u), Some(l)) => Some(u.min(l)),
133        (Some(u), None) => Some(u),
134        (None, Some(l)) => Some(l),
135        (None, None) => None,
136    };
137
138    Ok(PercentileCapabilityResult {
139        cp_star,
140        cpk_star,
141        cpu_star,
142        cpl_star,
143        median,
144        percentile_lower: p_lower,
145        percentile_upper: p_upper,
146    })
147}
148
149// ---------------------------------------------------------------------------
150// Tests
151// ---------------------------------------------------------------------------
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    /// Generate uniform-like data for testing.
158    fn uniform_data(n: usize) -> Vec<f64> {
159        (0..n)
160            .map(|i| 10.0 + (i as f64) / (n as f64 - 1.0) * 10.0)
161            .collect()
162    }
163
164    #[test]
165    fn basic_two_sided() {
166        let data = uniform_data(100);
167        let result = percentile_capability(&data, Some(5.0), Some(25.0)).unwrap();
168
169        assert!(result.cp_star.is_some());
170        assert!(result.cpk_star.is_some());
171        assert!(result.cpu_star.is_some());
172        assert!(result.cpl_star.is_some());
173        assert!(result.cp_star.unwrap() > 0.0);
174
175        // Median should be approximately the midpoint of the data
176        assert!(
177            (result.median - 15.0).abs() < 0.5,
178            "median should be ~15, got {}",
179            result.median
180        );
181    }
182
183    #[test]
184    fn usl_only() {
185        let data = uniform_data(50);
186        let result = percentile_capability(&data, None, Some(25.0)).unwrap();
187
188        assert!(result.cp_star.is_none(), "Cp* requires both limits");
189        assert!(result.cpu_star.is_some());
190        assert!(result.cpl_star.is_none());
191        assert!(result.cpk_star.is_some());
192        assert_eq!(result.cpk_star.unwrap(), result.cpu_star.unwrap());
193    }
194
195    #[test]
196    fn lsl_only() {
197        let data = uniform_data(50);
198        let result = percentile_capability(&data, Some(5.0), None).unwrap();
199
200        assert!(result.cp_star.is_none());
201        assert!(result.cpu_star.is_none());
202        assert!(result.cpl_star.is_some());
203        assert!(result.cpk_star.is_some());
204        assert_eq!(result.cpk_star.unwrap(), result.cpl_star.unwrap());
205    }
206
207    #[test]
208    fn rejects_insufficient_data() {
209        let data: Vec<f64> = (0..19).map(|i| i as f64).collect();
210        assert!(percentile_capability(&data, Some(0.0), Some(20.0)).is_err());
211    }
212
213    #[test]
214    fn rejects_no_spec_limits() {
215        let data = uniform_data(30);
216        assert!(percentile_capability(&data, None, None).is_err());
217    }
218
219    #[test]
220    fn rejects_non_finite_data() {
221        let mut data = uniform_data(30);
222        data[10] = f64::NAN;
223        assert!(percentile_capability(&data, Some(0.0), Some(30.0)).is_err());
224    }
225
226    #[test]
227    fn rejects_usl_leq_lsl() {
228        let data = uniform_data(30);
229        assert!(percentile_capability(&data, Some(10.0), Some(5.0)).is_err());
230        assert!(percentile_capability(&data, Some(10.0), Some(10.0)).is_err());
231    }
232
233    #[test]
234    fn cpk_star_equals_min_of_cpu_cpl() {
235        let data = uniform_data(100);
236        let result = percentile_capability(&data, Some(5.0), Some(25.0)).unwrap();
237
238        let cpu = result.cpu_star.unwrap();
239        let cpl = result.cpl_star.unwrap();
240        let cpk = result.cpk_star.unwrap();
241        assert!(
242            (cpk - cpu.min(cpl)).abs() < 1e-10,
243            "Cpk* should equal min(Cpu*, Cpl*)"
244        );
245    }
246
247    #[test]
248    fn percentiles_bracket_data() {
249        let data = uniform_data(200);
250        let result = percentile_capability(&data, Some(5.0), Some(25.0)).unwrap();
251
252        // Lower percentile should be near but >= data minimum
253        assert!(
254            result.percentile_lower >= data.iter().copied().fold(f64::INFINITY, f64::min) - 1e-10,
255            "lower percentile below data min"
256        );
257        // Upper percentile should be near but <= data maximum
258        assert!(
259            result.percentile_upper
260                <= data.iter().copied().fold(f64::NEG_INFINITY, f64::max) + 1e-10,
261            "upper percentile above data max"
262        );
263        // p_lower < median < p_upper
264        assert!(result.percentile_lower < result.median);
265        assert!(result.median < result.percentile_upper);
266    }
267
268    #[test]
269    fn symmetric_data_gives_balanced_cpu_cpl() {
270        // Symmetric data centered at 50
271        let data: Vec<f64> = (0..200).map(|i| 40.0 + (i as f64) * 0.1).collect();
272        let lsl = 30.0;
273        let usl = 70.0;
274        let result = percentile_capability(&data, Some(lsl), Some(usl)).unwrap();
275
276        let cpu = result.cpu_star.unwrap();
277        let cpl = result.cpl_star.unwrap();
278
279        // For roughly symmetric data centered between limits, Cpu ≈ Cpl
280        assert!(
281            (cpu - cpl).abs() < cpu * 0.2,
282            "symmetric data should give similar Cpu* ({}) and Cpl* ({})",
283            cpu,
284            cpl
285        );
286    }
287}