Skip to main content

plotkit_core/charts/
boxplot.rs

1//! Box plot builder methods.
2//!
3//! Provides a fluent builder API for configuring [`BoxPlotArtist`] instances.
4//! Each method returns `&mut Self`, allowing calls to be chained together
5//! for concise, readable chart construction.
6
7use crate::artist::BoxPlotArtist;
8use crate::primitives::Color;
9
10/// Summary statistics for a single group in a box plot.
11///
12/// Holds the quartiles, whisker endpoints, and outliers computed from
13/// raw data. The whisker limits are determined by the interquartile
14/// range multiplied by a configurable factor (default 1.5).
15#[derive(Debug, Clone)]
16pub struct BoxStats {
17    /// First quartile (25th percentile).
18    pub q1: f64,
19    /// Median (50th percentile).
20    pub median: f64,
21    /// Third quartile (75th percentile).
22    pub q3: f64,
23    /// Lower whisker endpoint (lowest datum within Q1 - factor * IQR).
24    pub whisker_low: f64,
25    /// Upper whisker endpoint (highest datum within Q3 + factor * IQR).
26    pub whisker_high: f64,
27    /// Data points falling outside the whisker range.
28    pub outliers: Vec<f64>,
29}
30
31/// Computes the linear-interpolation percentile of a sorted slice.
32///
33/// Uses the same interpolation method as NumPy's default (`linear`).
34/// `p` must be in the range [0.0, 1.0].
35fn percentile(sorted: &[f64], p: f64) -> f64 {
36    assert!(!sorted.is_empty(), "percentile requires non-empty data");
37    if sorted.len() == 1 {
38        return sorted[0];
39    }
40    let idx = p * (sorted.len() - 1) as f64;
41    let lo = idx.floor() as usize;
42    let hi = lo + 1;
43    let frac = idx - lo as f64;
44    if hi >= sorted.len() {
45        sorted[sorted.len() - 1]
46    } else {
47        sorted[lo] * (1.0 - frac) + sorted[hi] * frac
48    }
49}
50
51/// Computes box plot statistics for a single dataset.
52///
53/// The `whisker_factor` controls how far the whiskers extend beyond Q1 and Q3,
54/// as a multiple of the interquartile range (IQR). The standard value is 1.5.
55/// Points outside the whisker range are classified as outliers.
56pub fn compute_stats(data: &[f64], whisker_factor: f64) -> BoxStats {
57    let mut sorted: Vec<f64> = data.iter().copied().filter(|v| v.is_finite()).collect();
58    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
59
60    if sorted.is_empty() {
61        return BoxStats {
62            q1: 0.0,
63            median: 0.0,
64            q3: 0.0,
65            whisker_low: 0.0,
66            whisker_high: 0.0,
67            outliers: vec![],
68        };
69    }
70
71    let q1 = percentile(&sorted, 0.25);
72    let median = percentile(&sorted, 0.5);
73    let q3 = percentile(&sorted, 0.75);
74    let iqr = q3 - q1;
75
76    let fence_low = q1 - whisker_factor * iqr;
77    let fence_high = q3 + whisker_factor * iqr;
78
79    // Whisker endpoints are the most extreme data points within the fences.
80    let whisker_low = sorted
81        .iter()
82        .copied()
83        .find(|&v| v >= fence_low)
84        .unwrap_or(q1);
85    let whisker_high = sorted
86        .iter()
87        .rev()
88        .copied()
89        .find(|&v| v <= fence_high)
90        .unwrap_or(q3);
91
92    let outliers: Vec<f64> = sorted
93        .iter()
94        .copied()
95        .filter(|&v| v < whisker_low || v > whisker_high)
96        .collect();
97
98    BoxStats {
99        q1,
100        median,
101        q3,
102        whisker_low,
103        whisker_high,
104        outliers,
105    }
106}
107
108impl BoxPlotArtist {
109    /// Sets the box fill color.
110    ///
111    /// Applies the given [`Color`] to every box rendered by this artist.
112    pub fn color(&mut self, color: Color) -> &mut Self {
113        self.color = color;
114        self
115    }
116
117    /// Sets the legend label.
118    ///
119    /// When a legend is displayed on the figure, this label will appear
120    /// next to the color swatch for this box plot.
121    pub fn label(&mut self, label: &str) -> &mut Self {
122        self.label = Some(label.to_string());
123        self
124    }
125
126    /// Sets the opacity.
127    ///
128    /// The value is clamped to the range [0.0, 1.0], where 0.0 is fully
129    /// transparent and 1.0 is fully opaque.
130    pub fn alpha(&mut self, alpha: f64) -> &mut Self {
131        self.alpha = alpha.clamp(0.0, 1.0);
132        self
133    }
134
135    /// Sets the box width as a fraction of the category spacing.
136    ///
137    /// Smaller values produce thinner boxes with more whitespace between them.
138    /// The value is clamped to [0.1, 1.0].
139    pub fn width(&mut self, width: f64) -> &mut Self {
140        self.box_width = width.clamp(0.1, 1.0);
141        self
142    }
143
144    /// Controls whether outlier points are drawn.
145    ///
146    /// When `true` (the default), data points beyond the whiskers are rendered
147    /// as individual dots. When `false`, outliers are hidden.
148    pub fn show_outliers(&mut self, show: bool) -> &mut Self {
149        self.show_outliers = show;
150        self
151    }
152
153    /// Sets the whisker extent factor.
154    ///
155    /// Whiskers extend from Q1 and Q3 by this factor multiplied by the
156    /// interquartile range (IQR). The standard value is 1.5. Changing this
157    /// value recomputes the box statistics from the stored raw data.
158    pub fn whisker_factor(&mut self, factor: f64) -> &mut Self {
159        self.whisker_iq_factor = factor;
160        // Recompute stats with the new factor.
161        self.stats = self
162            .raw_data
163            .iter()
164            .map(|d| compute_stats(d, factor))
165            .collect();
166        self
167    }
168}
169
170// ---------------------------------------------------------------------------
171// Tests
172// ---------------------------------------------------------------------------
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn percentile_single_value() {
180        assert!((percentile(&[5.0], 0.5) - 5.0).abs() < f64::EPSILON);
181    }
182
183    #[test]
184    fn percentile_two_values() {
185        let data = [2.0, 8.0];
186        assert!((percentile(&data, 0.0) - 2.0).abs() < f64::EPSILON);
187        assert!((percentile(&data, 1.0) - 8.0).abs() < f64::EPSILON);
188        assert!((percentile(&data, 0.5) - 5.0).abs() < f64::EPSILON);
189    }
190
191    #[test]
192    fn percentile_interpolation() {
193        let data = [1.0, 2.0, 3.0, 4.0];
194        assert!((percentile(&data, 0.25) - 1.75).abs() < 1e-10);
195        assert!((percentile(&data, 0.5) - 2.5).abs() < 1e-10);
196        assert!((percentile(&data, 0.75) - 3.25).abs() < 1e-10);
197    }
198
199    #[test]
200    fn compute_stats_basic() {
201        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
202        let stats = compute_stats(&data, 1.5);
203        assert!((stats.median - 5.5).abs() < 1e-10);
204        assert!((stats.q1 - 3.25).abs() < 1e-10);
205        assert!((stats.q3 - 7.75).abs() < 1e-10);
206        assert!(stats.outliers.is_empty());
207    }
208
209    #[test]
210    fn compute_stats_with_outliers() {
211        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
212        let stats = compute_stats(&data, 1.5);
213        assert!(!stats.outliers.is_empty());
214        assert!(stats.outliers.contains(&100.0));
215    }
216
217    #[test]
218    fn compute_stats_empty_data() {
219        let stats = compute_stats(&[], 1.5);
220        assert!((stats.q1 - 0.0).abs() < f64::EPSILON);
221        assert!((stats.median - 0.0).abs() < f64::EPSILON);
222        assert!((stats.q3 - 0.0).abs() < f64::EPSILON);
223        assert!(stats.outliers.is_empty());
224    }
225
226    #[test]
227    fn compute_stats_single_value() {
228        let stats = compute_stats(&[42.0], 1.5);
229        assert!((stats.q1 - 42.0).abs() < f64::EPSILON);
230        assert!((stats.median - 42.0).abs() < f64::EPSILON);
231        assert!((stats.q3 - 42.0).abs() < f64::EPSILON);
232        assert!(stats.outliers.is_empty());
233    }
234
235    #[test]
236    fn compute_stats_two_values() {
237        let stats = compute_stats(&[3.0, 7.0], 1.5);
238        assert!((stats.median - 5.0).abs() < 1e-10);
239        assert!((stats.q1 - 4.0).abs() < 1e-10);
240        assert!((stats.q3 - 6.0).abs() < 1e-10);
241    }
242
243    #[test]
244    fn compute_stats_nan_filtered() {
245        let data = vec![f64::NAN, 1.0, 2.0, 3.0, f64::NAN];
246        let stats = compute_stats(&data, 1.5);
247        assert!((stats.median - 2.0).abs() < 1e-10);
248    }
249
250    #[test]
251    fn compute_stats_whisker_endpoints() {
252        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
253        let stats = compute_stats(&data, 1.5);
254        assert!((stats.whisker_low - 1.0).abs() < 1e-10);
255        assert!((stats.whisker_high - 10.0).abs() < 1e-10);
256    }
257
258    #[test]
259    fn compute_stats_custom_whisker_factor() {
260        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
261        let stats_narrow = compute_stats(&data, 0.5);
262        let stats_wide = compute_stats(&data, 3.0);
263        assert!(stats_narrow.outliers.len() >= stats_wide.outliers.len());
264    }
265
266    #[test]
267    fn builder_color() {
268        let mut artist = sample_boxplot_artist();
269        artist.color(Color::TAB_RED);
270        assert_eq!(artist.color, Color::TAB_RED);
271    }
272
273    #[test]
274    fn builder_label() {
275        let mut artist = sample_boxplot_artist();
276        artist.label("my boxplot");
277        assert_eq!(artist.label.as_deref(), Some("my boxplot"));
278    }
279
280    #[test]
281    fn builder_alpha() {
282        let mut artist = sample_boxplot_artist();
283        artist.alpha(0.5);
284        assert!((artist.alpha - 0.5).abs() < f64::EPSILON);
285    }
286
287    #[test]
288    fn builder_alpha_clamped() {
289        let mut artist = sample_boxplot_artist();
290        artist.alpha(2.0);
291        assert!((artist.alpha - 1.0).abs() < f64::EPSILON);
292        artist.alpha(-1.0);
293        assert!((artist.alpha - 0.0).abs() < f64::EPSILON);
294    }
295
296    #[test]
297    fn builder_width() {
298        let mut artist = sample_boxplot_artist();
299        artist.width(0.5);
300        assert!((artist.box_width - 0.5).abs() < f64::EPSILON);
301    }
302
303    #[test]
304    fn builder_width_clamped() {
305        let mut artist = sample_boxplot_artist();
306        artist.width(0.01);
307        assert!((artist.box_width - 0.1).abs() < f64::EPSILON);
308        artist.width(5.0);
309        assert!((artist.box_width - 1.0).abs() < f64::EPSILON);
310    }
311
312    #[test]
313    fn builder_show_outliers() {
314        let mut artist = sample_boxplot_artist();
315        artist.show_outliers(false);
316        assert!(!artist.show_outliers);
317    }
318
319    #[test]
320    fn builder_whisker_factor_recomputes() {
321        let mut artist = sample_boxplot_artist();
322        let old_stats = artist.stats.clone();
323        artist.whisker_factor(0.5);
324        assert!(
325            artist.stats[0].whisker_high <= old_stats[0].whisker_high
326                || artist.stats[0].whisker_low >= old_stats[0].whisker_low
327                || artist.stats[0].outliers.len() >= old_stats[0].outliers.len()
328        );
329    }
330
331    #[test]
332    fn data_bounds_single_group() {
333        let artist = sample_boxplot_artist();
334        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
335        assert!((xmin - (-0.5)).abs() < f64::EPSILON);
336        assert!((xmax - 0.5).abs() < f64::EPSILON);
337        assert!(ymin <= artist.stats[0].whisker_low);
338        assert!(ymax >= artist.stats[0].whisker_high);
339    }
340
341    #[test]
342    fn data_bounds_multiple_groups() {
343        let raw = vec![vec![1.0, 2.0, 3.0], vec![10.0, 20.0, 30.0]];
344        let stats: Vec<BoxStats> = raw.iter().map(|d| compute_stats(d, 1.5)).collect();
345        let artist = BoxPlotArtist {
346            stats,
347            labels: vec!["A".to_string(), "B".to_string()],
348            color: Color::TAB_BLUE,
349            label: None,
350            alpha: 1.0,
351            box_width: 0.5,
352            show_outliers: true,
353            whisker_iq_factor: 1.5,
354            raw_data: raw,
355        };
356        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
357        assert!((xmin - (-0.5)).abs() < f64::EPSILON);
358        assert!((xmax - 1.5).abs() < f64::EPSILON);
359        assert!(ymin <= 1.0);
360        assert!(ymax >= 30.0);
361    }
362
363    /// Helper to create a sample BoxPlotArtist for builder tests.
364    fn sample_boxplot_artist() -> BoxPlotArtist {
365        let raw = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]];
366        let stats = vec![compute_stats(&raw[0], 1.5)];
367        BoxPlotArtist {
368            stats,
369            labels: vec!["Group 1".to_string()],
370            color: Color::TAB_BLUE,
371            label: None,
372            alpha: 1.0,
373            box_width: 0.5,
374            show_outliers: true,
375            whisker_iq_factor: 1.5,
376            raw_data: raw,
377        }
378    }
379}