Skip to main content

ggplot_rs/stat/
boxplot.rs

1use crate::aes::Aesthetic;
2use crate::data::{DataFrame, Value};
3use crate::scale::ScaleSet;
4
5use super::Stat;
6
7/// Computes boxplot statistics: quartiles, whiskers, outliers.
8pub struct StatBoxplot;
9
10impl Stat for StatBoxplot {
11    fn compute_group(&self, data: &DataFrame, _scales: &ScaleSet) -> DataFrame {
12        let y_col = match data.column("y") {
13            Some(c) => c,
14            None => return DataFrame::new(),
15        };
16
17        let mut values: Vec<f64> = y_col.iter().filter_map(|v| v.as_f64()).collect();
18        if values.is_empty() {
19            return DataFrame::new();
20        }
21
22        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
23
24        let n = values.len();
25        let q1 = percentile(&values, 25.0);
26        let median = percentile(&values, 50.0);
27        let q3 = percentile(&values, 75.0);
28        let iqr = q3 - q1;
29
30        let lower_fence = q1 - 1.5 * iqr;
31        let upper_fence = q3 + 1.5 * iqr;
32
33        // Whiskers extend to most extreme non-outlier
34        let ymin = values
35            .iter()
36            .find(|&&v| v >= lower_fence)
37            .copied()
38            .unwrap_or(q1);
39        let ymax = values
40            .iter()
41            .rev()
42            .find(|&&v| v <= upper_fence)
43            .copied()
44            .unwrap_or(q3);
45
46        // Outliers
47        let outliers: Vec<f64> = values
48            .iter()
49            .filter(|&&v| v < lower_fence || v > upper_fence)
50            .copied()
51            .collect();
52
53        // Get x value (group identifier)
54        let x_val = data
55            .column("x")
56            .and_then(|c| c.first())
57            .cloned()
58            .unwrap_or(Value::Float(0.0));
59
60        let mut result = DataFrame::new();
61        result.add_column("x".to_string(), vec![x_val.clone()]);
62        result.add_column("ymin".to_string(), vec![Value::Float(ymin)]);
63        result.add_column("lower".to_string(), vec![Value::Float(q1)]);
64        result.add_column("middle".to_string(), vec![Value::Float(median)]);
65        result.add_column("upper".to_string(), vec![Value::Float(q3)]);
66        result.add_column("ymax".to_string(), vec![Value::Float(ymax)]);
67        result.add_column(
68            "notchupper".to_string(),
69            vec![Value::Float(median + 1.58 * iqr / (n as f64).sqrt())],
70        );
71        result.add_column(
72            "notchlower".to_string(),
73            vec![Value::Float(median - 1.58 * iqr / (n as f64).sqrt())],
74        );
75
76        // Store outliers as separate rows in a companion column
77        if !outliers.is_empty() {
78            // We'll encode outlier count; actual outlier drawing handled by geom
79            let outlier_str = outliers
80                .iter()
81                .map(|v| v.to_string())
82                .collect::<Vec<_>>()
83                .join(",");
84            result.add_column("outliers".to_string(), vec![Value::Str(outlier_str)]);
85        }
86
87        // Carry over fill/color
88        for col_name in &["fill", "color"] {
89            if let Some(col) = data.column(col_name) {
90                if let Some(first) = col.first() {
91                    result.add_column(col_name.to_string(), vec![first.clone()]);
92                }
93            }
94        }
95
96        result
97    }
98
99    fn required_aes(&self) -> Vec<Aesthetic> {
100        vec![Aesthetic::X, Aesthetic::Y]
101    }
102
103    fn name(&self) -> &str {
104        "boxplot"
105    }
106}
107
108fn percentile(sorted: &[f64], p: f64) -> f64 {
109    if sorted.is_empty() {
110        return 0.0;
111    }
112    if sorted.len() == 1 {
113        return sorted[0];
114    }
115
116    let k = (p / 100.0) * (sorted.len() - 1) as f64;
117    let f = k.floor() as usize;
118    let c = k.ceil() as usize;
119
120    if f == c {
121        sorted[f]
122    } else {
123        let d = k - f as f64;
124        sorted[f] * (1.0 - d) + sorted[c] * d
125    }
126}