Skip to main content

u_analytics/capability/
percentile.rs

1//! Percentile-based process capability (ISO 22514-2).
2//!
3//! When process data is non-normal, traditional capability indices based on
4//! mean and standard deviation can be misleading. The percentile method uses
5//! empirical quantiles (0.135th and 99.865th percentiles) as the natural
6//! process spread — equivalent to the ±3σ spread for normal data.
7//!
8//! # Indices
9//!
10//! - **Cp\*** — (USL − LSL) / (X₉₉.₈₆₅ − X₀.₁₃₅)
11//! - **Cpk\*** — min(Cpu\*, Cpl\*)
12//! - **Cpu\*** — (USL − median) / (X₉₉.₈₆₅ − median)
13//! - **Cpl\*** — (median − LSL) / (median − X₀.₁₃₅)
14//!
15//! # References
16//!
17//! - ISO 22514-2:2017. "Statistical methods in process management — Capability and performance",
18//!   §4.3, percentile method.
19//! - Clements, J.A. (1989). "Process capability calculations for non-normal distributions."
20
21use u_numflow::stats;
22
23/// Result of percentile-based process capability analysis.
24#[derive(Debug, Clone)]
25pub struct PercentileCapabilityResult {
26    /// Cp* = (USL − LSL) / (X₉₉.₈₆₅ − X₀.₁₃₅). Requires both limits.
27    pub cp_star: Option<f64>,
28    /// Cpk* = min(Cpu*, Cpl*). Requires at least one limit.
29    pub cpk_star: Option<f64>,
30    /// Cpu* = (USL − median) / (X₉₉.₈₆₅ − median). Requires USL.
31    pub cpu_star: Option<f64>,
32    /// Cpl* = (median − LSL) / (median − X₀.₁₃₅). Requires LSL.
33    pub cpl_star: Option<f64>,
34    /// Sample median.
35    pub median: f64,
36    /// 0.135th percentile value (lower natural process limit).
37    pub percentile_lower: f64,
38    /// 99.865th percentile value (upper natural process limit).
39    pub percentile_upper: f64,
40}
41
42/// Compute percentile-based capability indices (ISO 22514-2).
43///
44/// Uses empirical quantiles at 0.00135 and 0.99865 (corresponding to ±3σ
45/// for normal distributions) to define the natural process spread.
46///
47/// # Arguments
48///
49/// * `data` — Process observations (at least 20 data points required).
50/// * `lsl` — Lower specification limit (optional).
51/// * `usl` — Upper specification limit (optional).
52///
53/// # Returns
54///
55/// `Err` if:
56/// - Fewer than 20 data points (insufficient for extreme percentile estimation).
57/// - Neither `usl` nor `lsl` is provided.
58/// - Data contains non-finite values.
59/// - The percentile spread is zero or negative (degenerate data).
60///
61/// # Examples
62///
63/// ```
64/// use u_analytics::capability::percentile_capability;
65///
66/// let data: Vec<f64> = (0..100).map(|i| 10.0 + (i as f64) * 0.1).collect();
67/// let result = percentile_capability(&data, Some(5.0), Some(15.0)).unwrap();
68/// assert!(result.cp_star.is_some());
69/// assert!(result.median > 0.0);
70/// ```
71pub fn percentile_capability(
72    data: &[f64],
73    lsl: Option<f64>,
74    usl: Option<f64>,
75) -> Result<PercentileCapabilityResult, &'static str> {
76    // Validate inputs
77    if usl.is_none() && lsl.is_none() {
78        return Err("at least one specification limit (USL or LSL) is required");
79    }
80    if data.len() < 20 {
81        return Err("at least 20 data points are required for percentile capability");
82    }
83    if data.iter().any(|v| !v.is_finite()) {
84        return Err("all data values must be finite");
85    }
86    if let (Some(u), Some(l)) = (usl, lsl) {
87        if u <= l {
88            return Err("USL must be greater than LSL");
89        }
90    }
91
92    // Compute percentiles using u-numflow (R-7 linear interpolation)
93    let p_lower = stats::quantile(data, 0.00135)
94        .ok_or("failed to compute 0.135th percentile")?;
95    let p_upper = stats::quantile(data, 0.99865)
96        .ok_or("failed to compute 99.865th percentile")?;
97    let median = stats::quantile(data, 0.5)
98        .ok_or("failed to compute median")?;
99
100    // Validate spread
101    let spread = p_upper - p_lower;
102    if spread < 1e-300 {
103        return Err("percentile spread is zero — data has no variation");
104    }
105
106    let upper_spread = p_upper - median;
107    let lower_spread = median - p_lower;
108
109    // Cp* = (USL - LSL) / (p_upper - p_lower)
110    let cp_star = match (usl, lsl) {
111        (Some(u), Some(l)) => Some((u - l) / spread),
112        _ => None,
113    };
114
115    // Cpu* = (USL - median) / (p_upper - median)
116    let cpu_star = usl.and_then(|u| {
117        if upper_spread > 1e-300 {
118            Some((u - median) / upper_spread)
119        } else {
120            None
121        }
122    });
123
124    // Cpl* = (median - LSL) / (median - p_lower)
125    let cpl_star = lsl.and_then(|l| {
126        if lower_spread > 1e-300 {
127            Some((median - l) / lower_spread)
128        } else {
129            None
130        }
131    });
132
133    // Cpk* = min of available Cpu*/Cpl*
134    let cpk_star = match (cpu_star, cpl_star) {
135        (Some(u), Some(l)) => Some(u.min(l)),
136        (Some(u), None) => Some(u),
137        (None, Some(l)) => Some(l),
138        (None, None) => None,
139    };
140
141    Ok(PercentileCapabilityResult {
142        cp_star,
143        cpk_star,
144        cpu_star,
145        cpl_star,
146        median,
147        percentile_lower: p_lower,
148        percentile_upper: p_upper,
149    })
150}
151
152// ---------------------------------------------------------------------------
153// Tests
154// ---------------------------------------------------------------------------
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    /// Generate uniform-like data for testing.
161    fn uniform_data(n: usize) -> Vec<f64> {
162        (0..n).map(|i| 10.0 + (i as f64) / (n as f64 - 1.0) * 10.0).collect()
163    }
164
165    #[test]
166    fn basic_two_sided() {
167        let data = uniform_data(100);
168        let result = percentile_capability(&data, Some(5.0), Some(25.0)).unwrap();
169
170        assert!(result.cp_star.is_some());
171        assert!(result.cpk_star.is_some());
172        assert!(result.cpu_star.is_some());
173        assert!(result.cpl_star.is_some());
174        assert!(result.cp_star.unwrap() > 0.0);
175
176        // Median should be approximately the midpoint of the data
177        assert!(
178            (result.median - 15.0).abs() < 0.5,
179            "median should be ~15, got {}",
180            result.median
181        );
182    }
183
184    #[test]
185    fn usl_only() {
186        let data = uniform_data(50);
187        let result = percentile_capability(&data, None, Some(25.0)).unwrap();
188
189        assert!(result.cp_star.is_none(), "Cp* requires both limits");
190        assert!(result.cpu_star.is_some());
191        assert!(result.cpl_star.is_none());
192        assert!(result.cpk_star.is_some());
193        assert_eq!(result.cpk_star.unwrap(), result.cpu_star.unwrap());
194    }
195
196    #[test]
197    fn lsl_only() {
198        let data = uniform_data(50);
199        let result = percentile_capability(&data, Some(5.0), None).unwrap();
200
201        assert!(result.cp_star.is_none());
202        assert!(result.cpu_star.is_none());
203        assert!(result.cpl_star.is_some());
204        assert!(result.cpk_star.is_some());
205        assert_eq!(result.cpk_star.unwrap(), result.cpl_star.unwrap());
206    }
207
208    #[test]
209    fn rejects_insufficient_data() {
210        let data: Vec<f64> = (0..19).map(|i| i as f64).collect();
211        assert!(percentile_capability(&data, Some(0.0), Some(20.0)).is_err());
212    }
213
214    #[test]
215    fn rejects_no_spec_limits() {
216        let data = uniform_data(30);
217        assert!(percentile_capability(&data, None, None).is_err());
218    }
219
220    #[test]
221    fn rejects_non_finite_data() {
222        let mut data = uniform_data(30);
223        data[10] = f64::NAN;
224        assert!(percentile_capability(&data, Some(0.0), Some(30.0)).is_err());
225    }
226
227    #[test]
228    fn rejects_usl_leq_lsl() {
229        let data = uniform_data(30);
230        assert!(percentile_capability(&data, Some(10.0), Some(5.0)).is_err());
231        assert!(percentile_capability(&data, Some(10.0), Some(10.0)).is_err());
232    }
233
234    #[test]
235    fn cpk_star_equals_min_of_cpu_cpl() {
236        let data = uniform_data(100);
237        let result = percentile_capability(&data, Some(5.0), Some(25.0)).unwrap();
238
239        let cpu = result.cpu_star.unwrap();
240        let cpl = result.cpl_star.unwrap();
241        let cpk = result.cpk_star.unwrap();
242        assert!(
243            (cpk - cpu.min(cpl)).abs() < 1e-10,
244            "Cpk* should equal min(Cpu*, Cpl*)"
245        );
246    }
247
248    #[test]
249    fn percentiles_bracket_data() {
250        let data = uniform_data(200);
251        let result = percentile_capability(&data, Some(5.0), Some(25.0)).unwrap();
252
253        // Lower percentile should be near but >= data minimum
254        assert!(
255            result.percentile_lower >= data.iter().copied().fold(f64::INFINITY, f64::min) - 1e-10,
256            "lower percentile below data min"
257        );
258        // Upper percentile should be near but <= data maximum
259        assert!(
260            result.percentile_upper
261                <= data
262                    .iter()
263                    .copied()
264                    .fold(f64::NEG_INFINITY, f64::max)
265                    + 1e-10,
266            "upper percentile above data max"
267        );
268        // p_lower < median < p_upper
269        assert!(result.percentile_lower < result.median);
270        assert!(result.median < result.percentile_upper);
271    }
272
273    #[test]
274    fn symmetric_data_gives_balanced_cpu_cpl() {
275        // Symmetric data centered at 50
276        let data: Vec<f64> = (0..200).map(|i| 40.0 + (i as f64) * 0.1).collect();
277        let lsl = 30.0;
278        let usl = 70.0;
279        let result = percentile_capability(&data, Some(lsl), Some(usl)).unwrap();
280
281        let cpu = result.cpu_star.unwrap();
282        let cpl = result.cpl_star.unwrap();
283
284        // For roughly symmetric data centered between limits, Cpu ≈ Cpl
285        assert!(
286            (cpu - cpl).abs() < cpu * 0.2,
287            "symmetric data should give similar Cpu* ({}) and Cpl* ({})",
288            cpu,
289            cpl
290        );
291    }
292}