Skip to main content

plotkit_core/charts/
boxplot.rs

1//! Box plot builder methods.
2//!
3//! Provides a fluent builder API for configuring [`BoxPlotArtist`] instances.
4//! Each method returns `&mut Self`, allowing calls to be chained together
5//! for concise, readable chart construction.
6
7use crate::artist::BoxPlotArtist;
8use crate::primitives::Color;
9
10/// Summary statistics for a single group in a box plot.
11///
12/// Holds the quartiles, whisker endpoints, and outliers computed from
13/// raw data. The whisker limits are determined by the interquartile
14/// range multiplied by a configurable factor (default 1.5).
15#[derive(Debug, Clone)]
16pub struct BoxStats {
17    /// First quartile (25th percentile).
18    pub q1: f64,
19    /// Median (50th percentile).
20    pub median: f64,
21    /// Third quartile (75th percentile).
22    pub q3: f64,
23    /// Lower whisker endpoint (lowest datum within Q1 - factor * IQR).
24    pub whisker_low: f64,
25    /// Upper whisker endpoint (highest datum within Q3 + factor * IQR).
26    pub whisker_high: f64,
27    /// Data points falling outside the whisker range.
28    pub outliers: Vec<f64>,
29}
30
31/// Computes the linear-interpolation percentile of a sorted slice.
32///
33/// Uses the same interpolation method as NumPy's default (`linear`).
34/// `p` must be in the range [0.0, 1.0].
35fn percentile(sorted: &[f64], p: f64) -> f64 {
36    assert!(!sorted.is_empty(), "percentile requires non-empty data");
37    if sorted.len() == 1 {
38        return sorted[0];
39    }
40    let idx = p * (sorted.len() - 1) as f64;
41    let lo = idx.floor() as usize;
42    let hi = lo + 1;
43    let frac = idx - lo as f64;
44    if hi >= sorted.len() {
45        sorted[sorted.len() - 1]
46    } else {
47        sorted[lo] * (1.0 - frac) + sorted[hi] * frac
48    }
49}
50
51/// Computes box plot statistics for a single dataset.
52///
53/// The `whisker_factor` controls how far the whiskers extend beyond Q1 and Q3,
54/// as a multiple of the interquartile range (IQR). The standard value is 1.5.
55/// Points outside the whisker range are classified as outliers.
56pub fn compute_stats(data: &[f64], whisker_factor: f64) -> BoxStats {
57    let mut sorted: Vec<f64> = data.iter().copied().filter(|v| v.is_finite()).collect();
58    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
59
60    if sorted.is_empty() {
61        return BoxStats {
62            q1: 0.0,
63            median: 0.0,
64            q3: 0.0,
65            whisker_low: 0.0,
66            whisker_high: 0.0,
67            outliers: vec![],
68        };
69    }
70
71    let q1 = percentile(&sorted, 0.25);
72    let median = percentile(&sorted, 0.5);
73    let q3 = percentile(&sorted, 0.75);
74    let iqr = q3 - q1;
75
76    let fence_low = q1 - whisker_factor * iqr;
77    let fence_high = q3 + whisker_factor * iqr;
78
79    // Whisker endpoints are the most extreme data points within the fences.
80    let whisker_low = sorted.iter().copied().find(|&v| v >= fence_low).unwrap_or(q1);
81    let whisker_high = sorted.iter().rev().copied().find(|&v| v <= fence_high).unwrap_or(q3);
82
83    let outliers: Vec<f64> = sorted.iter().copied().filter(|&v| v < whisker_low || v > whisker_high).collect();
84
85    BoxStats {
86        q1,
87        median,
88        q3,
89        whisker_low,
90        whisker_high,
91        outliers,
92    }
93}
94
95impl BoxPlotArtist {
96    /// Sets the box fill color.
97    ///
98    /// Applies the given [`Color`] to every box rendered by this artist.
99    pub fn color(&mut self, color: Color) -> &mut Self {
100        self.color = color;
101        self
102    }
103
104    /// Sets the legend label.
105    ///
106    /// When a legend is displayed on the figure, this label will appear
107    /// next to the color swatch for this box plot.
108    pub fn label(&mut self, label: &str) -> &mut Self {
109        self.label = Some(label.to_string());
110        self
111    }
112
113    /// Sets the opacity.
114    ///
115    /// The value is clamped to the range [0.0, 1.0], where 0.0 is fully
116    /// transparent and 1.0 is fully opaque.
117    pub fn alpha(&mut self, alpha: f64) -> &mut Self {
118        self.alpha = alpha.clamp(0.0, 1.0);
119        self
120    }
121
122    /// Sets the box width as a fraction of the category spacing.
123    ///
124    /// Smaller values produce thinner boxes with more whitespace between them.
125    /// The value is clamped to [0.1, 1.0].
126    pub fn width(&mut self, width: f64) -> &mut Self {
127        self.box_width = width.clamp(0.1, 1.0);
128        self
129    }
130
131    /// Controls whether outlier points are drawn.
132    ///
133    /// When `true` (the default), data points beyond the whiskers are rendered
134    /// as individual dots. When `false`, outliers are hidden.
135    pub fn show_outliers(&mut self, show: bool) -> &mut Self {
136        self.show_outliers = show;
137        self
138    }
139
140    /// Sets the whisker extent factor.
141    ///
142    /// Whiskers extend from Q1 and Q3 by this factor multiplied by the
143    /// interquartile range (IQR). The standard value is 1.5. Changing this
144    /// value recomputes the box statistics from the stored raw data.
145    pub fn whisker_factor(&mut self, factor: f64) -> &mut Self {
146        self.whisker_iq_factor = factor;
147        // Recompute stats with the new factor.
148        self.stats = self
149            .raw_data
150            .iter()
151            .map(|d| compute_stats(d, factor))
152            .collect();
153        self
154    }
155}
156
157// ---------------------------------------------------------------------------
158// Tests
159// ---------------------------------------------------------------------------
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    #[test]
166    fn percentile_single_value() {
167        assert!((percentile(&[5.0], 0.5) - 5.0).abs() < f64::EPSILON);
168    }
169
170    #[test]
171    fn percentile_two_values() {
172        let data = [2.0, 8.0];
173        assert!((percentile(&data, 0.0) - 2.0).abs() < f64::EPSILON);
174        assert!((percentile(&data, 1.0) - 8.0).abs() < f64::EPSILON);
175        assert!((percentile(&data, 0.5) - 5.0).abs() < f64::EPSILON);
176    }
177
178    #[test]
179    fn percentile_interpolation() {
180        let data = [1.0, 2.0, 3.0, 4.0];
181        assert!((percentile(&data, 0.25) - 1.75).abs() < 1e-10);
182        assert!((percentile(&data, 0.5) - 2.5).abs() < 1e-10);
183        assert!((percentile(&data, 0.75) - 3.25).abs() < 1e-10);
184    }
185
186    #[test]
187    fn compute_stats_basic() {
188        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
189        let stats = compute_stats(&data, 1.5);
190        assert!((stats.median - 5.5).abs() < 1e-10);
191        assert!((stats.q1 - 3.25).abs() < 1e-10);
192        assert!((stats.q3 - 7.75).abs() < 1e-10);
193        assert!(stats.outliers.is_empty());
194    }
195
196    #[test]
197    fn compute_stats_with_outliers() {
198        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
199        let stats = compute_stats(&data, 1.5);
200        assert!(!stats.outliers.is_empty());
201        assert!(stats.outliers.contains(&100.0));
202    }
203
204    #[test]
205    fn compute_stats_empty_data() {
206        let stats = compute_stats(&[], 1.5);
207        assert!((stats.q1 - 0.0).abs() < f64::EPSILON);
208        assert!((stats.median - 0.0).abs() < f64::EPSILON);
209        assert!((stats.q3 - 0.0).abs() < f64::EPSILON);
210        assert!(stats.outliers.is_empty());
211    }
212
213    #[test]
214    fn compute_stats_single_value() {
215        let stats = compute_stats(&[42.0], 1.5);
216        assert!((stats.q1 - 42.0).abs() < f64::EPSILON);
217        assert!((stats.median - 42.0).abs() < f64::EPSILON);
218        assert!((stats.q3 - 42.0).abs() < f64::EPSILON);
219        assert!(stats.outliers.is_empty());
220    }
221
222    #[test]
223    fn compute_stats_two_values() {
224        let stats = compute_stats(&[3.0, 7.0], 1.5);
225        assert!((stats.median - 5.0).abs() < 1e-10);
226        assert!((stats.q1 - 4.0).abs() < 1e-10);
227        assert!((stats.q3 - 6.0).abs() < 1e-10);
228    }
229
230    #[test]
231    fn compute_stats_nan_filtered() {
232        let data = vec![f64::NAN, 1.0, 2.0, 3.0, f64::NAN];
233        let stats = compute_stats(&data, 1.5);
234        assert!((stats.median - 2.0).abs() < 1e-10);
235    }
236
237    #[test]
238    fn compute_stats_whisker_endpoints() {
239        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
240        let stats = compute_stats(&data, 1.5);
241        assert!((stats.whisker_low - 1.0).abs() < 1e-10);
242        assert!((stats.whisker_high - 10.0).abs() < 1e-10);
243    }
244
245    #[test]
246    fn compute_stats_custom_whisker_factor() {
247        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
248        let stats_narrow = compute_stats(&data, 0.5);
249        let stats_wide = compute_stats(&data, 3.0);
250        assert!(stats_narrow.outliers.len() >= stats_wide.outliers.len());
251    }
252
253    #[test]
254    fn builder_color() {
255        let mut artist = sample_boxplot_artist();
256        artist.color(Color::TAB_RED);
257        assert_eq!(artist.color, Color::TAB_RED);
258    }
259
260    #[test]
261    fn builder_label() {
262        let mut artist = sample_boxplot_artist();
263        artist.label("my boxplot");
264        assert_eq!(artist.label.as_deref(), Some("my boxplot"));
265    }
266
267    #[test]
268    fn builder_alpha() {
269        let mut artist = sample_boxplot_artist();
270        artist.alpha(0.5);
271        assert!((artist.alpha - 0.5).abs() < f64::EPSILON);
272    }
273
274    #[test]
275    fn builder_alpha_clamped() {
276        let mut artist = sample_boxplot_artist();
277        artist.alpha(2.0);
278        assert!((artist.alpha - 1.0).abs() < f64::EPSILON);
279        artist.alpha(-1.0);
280        assert!((artist.alpha - 0.0).abs() < f64::EPSILON);
281    }
282
283    #[test]
284    fn builder_width() {
285        let mut artist = sample_boxplot_artist();
286        artist.width(0.5);
287        assert!((artist.box_width - 0.5).abs() < f64::EPSILON);
288    }
289
290    #[test]
291    fn builder_width_clamped() {
292        let mut artist = sample_boxplot_artist();
293        artist.width(0.01);
294        assert!((artist.box_width - 0.1).abs() < f64::EPSILON);
295        artist.width(5.0);
296        assert!((artist.box_width - 1.0).abs() < f64::EPSILON);
297    }
298
299    #[test]
300    fn builder_show_outliers() {
301        let mut artist = sample_boxplot_artist();
302        artist.show_outliers(false);
303        assert!(!artist.show_outliers);
304    }
305
306    #[test]
307    fn builder_whisker_factor_recomputes() {
308        let mut artist = sample_boxplot_artist();
309        let old_stats = artist.stats.clone();
310        artist.whisker_factor(0.5);
311        assert!(artist.stats[0].whisker_high <= old_stats[0].whisker_high
312            || artist.stats[0].whisker_low >= old_stats[0].whisker_low
313            || artist.stats[0].outliers.len() >= old_stats[0].outliers.len());
314    }
315
316    #[test]
317    fn data_bounds_single_group() {
318        let artist = sample_boxplot_artist();
319        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
320        assert!((xmin - (-0.5)).abs() < f64::EPSILON);
321        assert!((xmax - 0.5).abs() < f64::EPSILON);
322        assert!(ymin <= artist.stats[0].whisker_low);
323        assert!(ymax >= artist.stats[0].whisker_high);
324    }
325
326    #[test]
327    fn data_bounds_multiple_groups() {
328        let raw = vec![
329            vec![1.0, 2.0, 3.0],
330            vec![10.0, 20.0, 30.0],
331        ];
332        let stats: Vec<BoxStats> = raw.iter().map(|d| compute_stats(d, 1.5)).collect();
333        let artist = BoxPlotArtist {
334            stats,
335            labels: vec!["A".to_string(), "B".to_string()],
336            color: Color::TAB_BLUE,
337            label: None,
338            alpha: 1.0,
339            box_width: 0.5,
340            show_outliers: true,
341            whisker_iq_factor: 1.5,
342            raw_data: raw,
343        };
344        let (xmin, xmax, ymin, ymax) = artist.data_bounds();
345        assert!((xmin - (-0.5)).abs() < f64::EPSILON);
346        assert!((xmax - 1.5).abs() < f64::EPSILON);
347        assert!(ymin <= 1.0);
348        assert!(ymax >= 30.0);
349    }
350
351    /// Helper to create a sample BoxPlotArtist for builder tests.
352    fn sample_boxplot_artist() -> BoxPlotArtist {
353        let raw = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]];
354        let stats = vec![compute_stats(&raw[0], 1.5)];
355        BoxPlotArtist {
356            stats,
357            labels: vec!["Group 1".to_string()],
358            color: Color::TAB_BLUE,
359            label: None,
360            alpha: 1.0,
361            box_width: 0.5,
362            show_outliers: true,
363            whisker_iq_factor: 1.5,
364            raw_data: raw,
365        }
366    }
367}