Skip to main content

fdars_core/alignment/
phase_boxplot.rs

1//! Phase (warping) box plots for functional data.
2//!
3//! Constructs functional box plots on warping functions to visualize
4//! phase variation. The median warp is the deepest warp by modified band
5//! depth, the central region contains the 50% deepest warps, and
6//! outliers are identified beyond the whisker fences.
7//!
8//! # References
9//!
10//! - Lopez-Pintado, S. & Romo, J. (2009). On the concept of depth for
11//!   functional data. *Journal of the American Statistical Association*,
12//!   104(486), 718-734.
13
14use crate::depth::modified_band_1d;
15use crate::error::FdarError;
16use crate::matrix::FdMatrix;
17
18/// Result of computing a phase (warping) box plot.
19///
20/// Contains the median warp, the 50% central envelope, whisker
21/// fences, depth values, and outlier indices.
22#[derive(Debug, Clone, PartialEq)]
23#[non_exhaustive]
24pub struct PhaseBoxplot {
25    /// Median warping function (deepest by modified band depth).
26    pub median: Vec<f64>,
27    /// Index of the median warp in the original matrix.
28    pub median_index: usize,
29    /// Lower boundary of the 50% central region (pointwise min of central warps).
30    pub central_lower: Vec<f64>,
31    /// Upper boundary of the 50% central region (pointwise max of central warps).
32    pub central_upper: Vec<f64>,
33    /// Lower whisker fence (trimmed to non-outlier envelope).
34    pub whisker_lower: Vec<f64>,
35    /// Upper whisker fence (trimmed to non-outlier envelope).
36    pub whisker_upper: Vec<f64>,
37    /// Modified band depth of each warp.
38    pub depths: Vec<f64>,
39    /// Indices of outlier warps (beyond whisker fences).
40    pub outlier_indices: Vec<usize>,
41    /// Whisker factor used (analogous to 1.5 in classical box plots).
42    pub factor: f64,
43}
44
45/// Compute a phase box plot from a set of warping functions.
46///
47/// Ranks warps by modified band depth, identifies the median (deepest warp),
48/// builds the 50% central envelope, and flags outliers beyond the whisker
49/// fences. Whiskers are trimmed to the actual envelope of non-outlier warps.
50///
51/// # Arguments
52/// * `gammas` — Warping functions (n x m, one warp per row)
53/// * `argvals` — Evaluation grid (length m)
54/// * `factor` — Whisker extent as a multiple of the IQR envelope (e.g. 1.5)
55///
56/// # Errors
57/// Returns `FdarError::InvalidDimension` if n < 3 or column count differs
58/// from `argvals`. Returns `FdarError::InvalidParameter` if `factor` is not
59/// positive.
60///
61/// # Examples
62///
63/// ```
64/// use fdars_core::matrix::FdMatrix;
65/// use fdars_core::alignment::phase_boxplot;
66///
67/// let m = 20;
68/// let argvals: Vec<f64> = (0..m).map(|i| i as f64 / (m - 1) as f64).collect();
69/// // 5 slightly shifted warps
70/// let mut data = vec![0.0; 5 * m];
71/// for i in 0..5 {
72///     let shift = 0.02 * (i as f64 - 2.0);
73///     for j in 0..m {
74///         let t = argvals[j];
75///         data[j * 5 + i] = (t + shift * t * (1.0 - t)).clamp(0.0, 1.0);
76///     }
77/// }
78/// let gammas = FdMatrix::from_column_major(data, 5, m).unwrap();
79/// let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
80/// assert_eq!(bp.median.len(), m);
81/// assert!(bp.depths.iter().all(|&d| d >= 0.0));
82/// ```
83#[must_use = "box plot result should not be discarded"]
84pub fn phase_boxplot(
85    gammas: &FdMatrix,
86    argvals: &[f64],
87    factor: f64,
88) -> Result<PhaseBoxplot, FdarError> {
89    let n = gammas.nrows();
90    let m = gammas.ncols();
91
92    if n < 3 {
93        return Err(FdarError::InvalidDimension {
94            parameter: "gammas",
95            expected: "at least 3 rows".to_string(),
96            actual: format!("{n} rows"),
97        });
98    }
99    if m != argvals.len() {
100        return Err(FdarError::InvalidDimension {
101            parameter: "argvals",
102            expected: format!("length {m}"),
103            actual: format!("length {}", argvals.len()),
104        });
105    }
106    if factor <= 0.0 || !factor.is_finite() {
107        return Err(FdarError::InvalidParameter {
108            parameter: "factor",
109            message: format!("must be positive and finite, got {factor}"),
110        });
111    }
112
113    // Step 1: Compute modified band depths.
114    let depths = modified_band_1d(gammas, gammas);
115
116    // Step 2: Sort indices by depth (descending).
117    let mut order: Vec<usize> = (0..n).collect();
118    order.sort_by(|&a, &b| {
119        depths[b]
120            .partial_cmp(&depths[a])
121            .unwrap_or(std::cmp::Ordering::Equal)
122    });
123
124    let median_index = order[0];
125    let median = gammas.row(median_index);
126
127    // Step 3: Top ceil(n/2) warps form the central region.
128    let n_central = n.div_ceil(2);
129    let central_indices = &order[..n_central];
130
131    // Step 4: Central envelope (pointwise min/max).
132    let mut central_lower = vec![f64::INFINITY; m];
133    let mut central_upper = vec![f64::NEG_INFINITY; m];
134    for &idx in central_indices {
135        for j in 0..m {
136            let v = gammas[(idx, j)];
137            if v < central_lower[j] {
138                central_lower[j] = v;
139            }
140            if v > central_upper[j] {
141                central_upper[j] = v;
142            }
143        }
144    }
145
146    // Step 5: IQR envelope and whisker fences.
147    let domain_lo = argvals[0];
148    let domain_hi = argvals[m - 1];
149    let mut fence_lower = vec![0.0; m];
150    let mut fence_upper = vec![0.0; m];
151    for j in 0..m {
152        let iqr = central_upper[j] - central_lower[j];
153        fence_lower[j] = (central_lower[j] - factor * iqr).max(domain_lo);
154        fence_upper[j] = (central_upper[j] + factor * iqr).min(domain_hi);
155    }
156
157    // Step 6: Identify outliers (any warp exceeding fences at any grid point).
158    let mut outlier_indices = Vec::new();
159    for i in 0..n {
160        let mut is_outlier = false;
161        for j in 0..m {
162            let v = gammas[(i, j)];
163            if v < fence_lower[j] || v > fence_upper[j] {
164                is_outlier = true;
165                break;
166            }
167        }
168        if is_outlier {
169            outlier_indices.push(i);
170        }
171    }
172
173    // Step 7: Trim whiskers to actual non-outlier envelope.
174    let mut whisker_lower = vec![f64::INFINITY; m];
175    let mut whisker_upper = vec![f64::NEG_INFINITY; m];
176    for i in 0..n {
177        if outlier_indices.contains(&i) {
178            continue;
179        }
180        for j in 0..m {
181            let v = gammas[(i, j)];
182            if v < whisker_lower[j] {
183                whisker_lower[j] = v;
184            }
185            if v > whisker_upper[j] {
186                whisker_upper[j] = v;
187            }
188        }
189    }
190
191    // If all warps are outliers (unlikely but defensive), fall back to fences.
192    if outlier_indices.len() == n {
193        whisker_lower = fence_lower;
194        whisker_upper = fence_upper;
195    }
196
197    Ok(PhaseBoxplot {
198        median,
199        median_index,
200        central_lower,
201        central_upper,
202        whisker_lower,
203        whisker_upper,
204        depths,
205        outlier_indices,
206        factor,
207    })
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    fn uniform_grid(n: usize) -> Vec<f64> {
215        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
216    }
217
218    /// Build n identity-like warps with small perturbations.
219    fn make_warps(n: usize, m: usize) -> (FdMatrix, Vec<f64>) {
220        let argvals = uniform_grid(m);
221        let mut data = vec![0.0; n * m];
222        for i in 0..n {
223            let shift = 0.03 * (i as f64 - (n as f64 - 1.0) / 2.0);
224            for j in 0..m {
225                let t = argvals[j];
226                // Smooth perturbation preserving [0,1] boundary
227                let v = t + shift * t * (1.0 - t);
228                data[j * n + i] = v.clamp(0.0, 1.0);
229            }
230        }
231        let gammas = FdMatrix::from_column_major(data, n, m).unwrap();
232        (gammas, argvals)
233    }
234
235    #[test]
236    fn too_few_curves_rejected() {
237        let argvals = uniform_grid(10);
238        let gammas = FdMatrix::zeros(2, 10);
239        let err = phase_boxplot(&gammas, &argvals, 1.5).unwrap_err();
240        assert!(matches!(err, FdarError::InvalidDimension { .. }));
241    }
242
243    #[test]
244    fn argvals_length_mismatch_rejected() {
245        let gammas = FdMatrix::zeros(5, 10);
246        let argvals = uniform_grid(8);
247        let err = phase_boxplot(&gammas, &argvals, 1.5).unwrap_err();
248        assert!(matches!(err, FdarError::InvalidDimension { .. }));
249    }
250
251    #[test]
252    fn negative_factor_rejected() {
253        let (gammas, argvals) = make_warps(5, 20);
254        let err = phase_boxplot(&gammas, &argvals, -1.0).unwrap_err();
255        assert!(matches!(err, FdarError::InvalidParameter { .. }));
256    }
257
258    #[test]
259    fn zero_factor_rejected() {
260        let (gammas, argvals) = make_warps(5, 20);
261        let err = phase_boxplot(&gammas, &argvals, 0.0).unwrap_err();
262        assert!(matches!(err, FdarError::InvalidParameter { .. }));
263    }
264
265    #[test]
266    fn basic_structure() {
267        let (gammas, argvals) = make_warps(7, 30);
268        let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
269
270        assert_eq!(bp.median.len(), 30);
271        assert_eq!(bp.central_lower.len(), 30);
272        assert_eq!(bp.central_upper.len(), 30);
273        assert_eq!(bp.whisker_lower.len(), 30);
274        assert_eq!(bp.whisker_upper.len(), 30);
275        assert_eq!(bp.depths.len(), 7);
276        assert_eq!(bp.factor, 1.5);
277        assert!(bp.median_index < 7);
278    }
279
280    #[test]
281    fn central_envelope_inside_whiskers() {
282        let (gammas, argvals) = make_warps(10, 25);
283        let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
284
285        for j in 0..25 {
286            assert!(
287                bp.central_lower[j] >= bp.whisker_lower[j] - 1e-12,
288                "central_lower should be >= whisker_lower at j={j}"
289            );
290            assert!(
291                bp.central_upper[j] <= bp.whisker_upper[j] + 1e-12,
292                "central_upper should be <= whisker_upper at j={j}"
293            );
294            assert!(
295                bp.central_lower[j] <= bp.central_upper[j] + 1e-12,
296                "central_lower should be <= central_upper at j={j}"
297            );
298        }
299    }
300
301    #[test]
302    fn median_inside_central_region() {
303        let (gammas, argvals) = make_warps(9, 20);
304        let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
305
306        for j in 0..20 {
307            assert!(
308                bp.median[j] >= bp.central_lower[j] - 1e-12,
309                "median should be >= central_lower at j={j}"
310            );
311            assert!(
312                bp.median[j] <= bp.central_upper[j] + 1e-12,
313                "median should be <= central_upper at j={j}"
314            );
315        }
316    }
317
318    #[test]
319    fn whiskers_within_domain() {
320        let (gammas, argvals) = make_warps(8, 20);
321        let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
322
323        let lo = argvals[0];
324        let hi = argvals[19];
325        for j in 0..20 {
326            assert!(
327                bp.whisker_lower[j] >= lo - 1e-12,
328                "whisker_lower should be >= domain lo at j={j}"
329            );
330            assert!(
331                bp.whisker_upper[j] <= hi + 1e-12,
332                "whisker_upper should be <= domain hi at j={j}"
333            );
334        }
335    }
336
337    #[test]
338    fn identical_warps_no_outliers() {
339        let m = 15;
340        let argvals = uniform_grid(m);
341        // All curves are the identity warp
342        let mut data = vec![0.0; 5 * m];
343        for i in 0..5 {
344            for j in 0..m {
345                data[j * 5 + i] = argvals[j];
346            }
347        }
348        let gammas = FdMatrix::from_column_major(data, 5, m).unwrap();
349        let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
350
351        assert!(
352            bp.outlier_indices.is_empty(),
353            "identical warps should have no outliers"
354        );
355    }
356
357    #[test]
358    fn extreme_warp_is_outlier() {
359        let m = 20;
360        let argvals = uniform_grid(m);
361
362        // 4 near-identity warps + 1 extreme warp
363        let n = 5;
364        let mut data = vec![0.0; n * m];
365        for i in 0..4 {
366            let shift = 0.01 * (i as f64 - 1.5);
367            for j in 0..m {
368                let t = argvals[j];
369                data[j * n + i] = (t + shift * t * (1.0 - t)).clamp(0.0, 1.0);
370            }
371        }
372        // Extreme warp: very fast early, very slow late
373        for j in 0..m {
374            let t = argvals[j];
375            let extreme = (t * t).clamp(0.0, 1.0);
376            data[j * n + 4] = extreme;
377        }
378
379        let gammas = FdMatrix::from_column_major(data, n, m).unwrap();
380        let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
381
382        assert!(
383            bp.outlier_indices.contains(&4),
384            "extreme warp (index 4) should be an outlier, got {:?}",
385            bp.outlier_indices
386        );
387    }
388
389    #[test]
390    fn depths_are_nonnegative() {
391        let (gammas, argvals) = make_warps(6, 20);
392        let bp = phase_boxplot(&gammas, &argvals, 1.5).unwrap();
393
394        assert!(
395            bp.depths.iter().all(|&d| d >= 0.0),
396            "all depths should be non-negative"
397        );
398    }
399}