Skip to main content

plotlars_core/plots/
boxplot.rs

1use bon::bon;
2
3use polars::frame::DataFrame;
4
5use crate::{
6    components::{Axis, FacetConfig, Legend, Orientation, Rgb, Text, DEFAULT_PLOTLY_COLORS},
7    ir::data::ColumnData,
8    ir::layout::LayoutIR,
9    ir::marker::MarkerIR,
10    ir::trace::{BoxPlotIR, TraceIR},
11};
12
13/// A structure representing a box plot.
14///
15/// The `BoxPlot` struct facilitates the creation and customization of box plots with various options
16/// for data selection, layout configuration, and aesthetic adjustments. It supports both horizontal
17/// and vertical orientations, grouping of data, display of individual data points with jitter and offset,
18/// opacity settings, and customizable markers and colors.
19///
20/// # Backend Support
21///
22/// | Backend | Supported |
23/// |---------|-----------|
24/// | Plotly  | Yes       |
25/// | Plotters| Yes       |
26///
27/// # Arguments
28///
29/// * `data` - A reference to the `DataFrame` containing the data to be plotted.
30/// * `labels` - A string slice specifying the column name to be used for the x-axis (independent variable).
31/// * `values` - A string slice specifying the column name to be used for the y-axis (dependent variable).
32/// * `orientation` - An optional `Orientation` enum specifying whether the plot should be horizontal or vertical.
33/// * `group` - An optional string slice specifying the column name to be used for grouping data points.
34/// * `sort_groups_by` - Optional comparator `fn(&str, &str) -> std::cmp::Ordering` to control group ordering. Groups are sorted lexically by default.
35/// * `facet` - An optional string slice specifying the column name to be used for creating facets (small multiples).
36/// * `facet_config` - An optional reference to a `FacetConfig` struct for customizing facet layout and behavior.
37/// * `box_points` - An optional boolean indicating whether individual data points should be plotted along with the box plot.
38/// * `point_offset` - An optional `f64` value specifying the horizontal offset for individual data points when `box_points` is enabled.
39/// * `jitter` - An optional `f64` value indicating the amount of jitter (random noise) to apply to individual data points for visibility.
40/// * `opacity` - An optional `f64` value specifying the opacity of the plot markers (range: 0.0 to 1.0).
41/// * `color` - An optional `Rgb` value specifying the color of the markers to be used for the plot. This is used when `group` is not specified.
42/// * `colors` - An optional vector of `Rgb` values specifying the colors to be used for the plot. This is used when `group` is specified to differentiate between groups.
43/// * `plot_title` - An optional `Text` struct specifying the title of the plot.
44/// * `x_title` - An optional `Text` struct specifying the title of the x-axis.
45/// * `y_title` - An optional `Text` struct specifying the title of the y-axis.
46/// * `legend_title` - An optional `Text` struct specifying the title of the legend.
47/// * `x_axis` - An optional reference to an `Axis` struct for customizing the x-axis.
48/// * `y_axis` - An optional reference to an `Axis` struct for customizing the y-axis.
49/// * `legend` - An optional reference to a `Legend` struct for customizing the legend of the plot (e.g., positioning, font, etc.).
50///
51/// # Example
52///
53/// ```rust
54/// use plotlars::{Axis, BoxPlot, Legend, Orientation, Plot, Rgb, Text};
55/// use polars::prelude::*;
56///
57/// let dataset = LazyCsvReader::new(PlRefPath::new("data/penguins.csv"))
58///     .finish()
59///     .unwrap()
60///     .select([
61///         col("species"),
62///         col("sex").alias("gender"),
63///         col("flipper_length_mm").cast(DataType::Int16),
64///         col("body_mass_g").cast(DataType::Int16),
65///     ])
66///     .collect()
67///     .unwrap();
68///
69/// BoxPlot::builder()
70///     .data(&dataset)
71///     .labels("species")
72///     .values("body_mass_g")
73///     .orientation(Orientation::Vertical)
74///     .group("gender")
75///     .box_points(true)
76///     .point_offset(-1.5)
77///     .jitter(0.01)
78///     .opacity(0.1)
79///     .colors(vec![
80///         Rgb(0, 191, 255),
81///         Rgb(57, 255, 20),
82///         Rgb(255, 105, 180),
83///     ])
84///     .plot_title(
85///         Text::from("Box Plot")
86///             .font("Arial")
87///             .size(18)
88///     )
89///     .x_title(
90///         Text::from("species")
91///             .font("Arial")
92///             .size(15)
93///     )
94///     .y_title(
95///         Text::from("body mass (g)")
96///             .font("Arial")
97///             .size(15)
98///     )
99///     .legend_title(
100///         Text::from("gender")
101///             .font("Arial")
102///             .size(15)
103///     )
104///     .y_axis(
105///         &Axis::new()
106///             .value_thousands(true)
107///     )
108///     .legend(
109///         &Legend::new()
110///             .border_width(1)
111///             .x(0.9)
112///     )
113///     .build()
114///     .plot();
115/// ```
116///
117/// ![Example](https://imgur.com/jdA3g9r.png)
118#[derive(Clone)]
119#[allow(dead_code)]
120pub struct BoxPlot {
121    traces: Vec<TraceIR>,
122    layout: LayoutIR,
123}
124
125#[bon]
126impl BoxPlot {
127    #[builder(on(String, into), on(Text, into))]
128    pub fn new(
129        data: &DataFrame,
130        labels: &str,
131        values: &str,
132        orientation: Option<Orientation>,
133        group: Option<&str>,
134        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
135        facet: Option<&str>,
136        facet_config: Option<&FacetConfig>,
137        box_points: Option<bool>,
138        point_offset: Option<f64>,
139        jitter: Option<f64>,
140        opacity: Option<f64>,
141        color: Option<Rgb>,
142        colors: Option<Vec<Rgb>>,
143        plot_title: Option<Text>,
144        x_title: Option<Text>,
145        y_title: Option<Text>,
146        legend_title: Option<Text>,
147        x_axis: Option<&Axis>,
148        y_axis: Option<&Axis>,
149        legend: Option<&Legend>,
150    ) -> Self {
151        let grid = facet.map(|facet_column| {
152            let config = facet_config.cloned().unwrap_or_default();
153            let facet_categories =
154                crate::data::get_unique_groups(data, facet_column, config.sorter);
155            let n_facets = facet_categories.len();
156            let (ncols, nrows) =
157                crate::faceting::calculate_grid_dimensions(n_facets, config.cols, config.rows);
158            crate::ir::facet::GridSpec {
159                kind: crate::ir::facet::FacetKind::Axis,
160                rows: nrows,
161                cols: ncols,
162                h_gap: config.h_gap,
163                v_gap: config.v_gap,
164                scales: config.scales.clone(),
165                n_facets,
166                facet_categories,
167                title_style: config.title_style.clone(),
168                x_title: x_title.clone(),
169                y_title: y_title.clone(),
170                x_axis: x_axis.cloned(),
171                y_axis: y_axis.cloned(),
172                legend_title: legend_title.clone(),
173                legend: legend.cloned(),
174            }
175        });
176
177        let layout = LayoutIR {
178            title: plot_title.clone(),
179            x_title: if grid.is_some() {
180                None
181            } else {
182                x_title.clone()
183            },
184            y_title: if grid.is_some() {
185                None
186            } else {
187                y_title.clone()
188            },
189            y2_title: None,
190            z_title: None,
191            legend_title: if grid.is_some() {
192                None
193            } else {
194                legend_title.clone()
195            },
196            legend: if grid.is_some() {
197                None
198            } else {
199                legend.cloned()
200            },
201            dimensions: None,
202            bar_mode: None,
203            box_mode: if group.is_some() {
204                Some(crate::ir::layout::BoxModeIR::Group)
205            } else {
206                None
207            },
208            box_gap: None,
209            margin_bottom: None,
210            axes_2d: if grid.is_some() {
211                None
212            } else {
213                Some(crate::ir::layout::Axes2dIR {
214                    x_axis: x_axis.cloned(),
215                    y_axis: y_axis.cloned(),
216                    y2_axis: None,
217                })
218            },
219            scene_3d: None,
220            polar: None,
221            mapbox: None,
222            grid,
223            annotations: vec![],
224        };
225
226        let traces = match facet {
227            Some(facet_column) => {
228                let config = facet_config.cloned().unwrap_or_default();
229                Self::create_ir_traces_faceted(
230                    data,
231                    labels,
232                    values,
233                    orientation.clone(),
234                    group,
235                    sort_groups_by,
236                    facet_column,
237                    &config,
238                    box_points,
239                    point_offset,
240                    jitter,
241                    opacity,
242                    color,
243                    colors.clone(),
244                )
245            }
246            None => Self::create_ir_traces(
247                data,
248                labels,
249                values,
250                orientation,
251                group,
252                sort_groups_by,
253                box_points,
254                point_offset,
255                jitter,
256                opacity,
257                color,
258                colors,
259            ),
260        };
261
262        Self { traces, layout }
263    }
264}
265
266#[bon]
267impl BoxPlot {
268    #[builder(
269        start_fn = try_builder,
270        finish_fn = try_build,
271        builder_type = BoxPlotTryBuilder,
272        on(String, into),
273        on(Text, into),
274    )]
275    pub fn try_new(
276        data: &DataFrame,
277        labels: &str,
278        values: &str,
279        orientation: Option<Orientation>,
280        group: Option<&str>,
281        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
282        facet: Option<&str>,
283        facet_config: Option<&FacetConfig>,
284        box_points: Option<bool>,
285        point_offset: Option<f64>,
286        jitter: Option<f64>,
287        opacity: Option<f64>,
288        color: Option<Rgb>,
289        colors: Option<Vec<Rgb>>,
290        plot_title: Option<Text>,
291        x_title: Option<Text>,
292        y_title: Option<Text>,
293        legend_title: Option<Text>,
294        x_axis: Option<&Axis>,
295        y_axis: Option<&Axis>,
296        legend: Option<&Legend>,
297    ) -> Result<Self, crate::io::PlotlarsError> {
298        std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
299            Self::__orig_new(
300                data,
301                labels,
302                values,
303                orientation,
304                group,
305                sort_groups_by,
306                facet,
307                facet_config,
308                box_points,
309                point_offset,
310                jitter,
311                opacity,
312                color,
313                colors,
314                plot_title,
315                x_title,
316                y_title,
317                legend_title,
318                x_axis,
319                y_axis,
320                legend,
321            )
322        }))
323        .map_err(|panic| {
324            let msg = panic
325                .downcast_ref::<String>()
326                .cloned()
327                .or_else(|| panic.downcast_ref::<&str>().map(|s| s.to_string()))
328                .unwrap_or_else(|| "unknown error".to_string());
329            crate::io::PlotlarsError::PlotBuild { message: msg }
330        })
331    }
332}
333
334impl BoxPlot {
335    #[allow(clippy::too_many_arguments)]
336    fn create_ir_traces(
337        data: &DataFrame,
338        labels: &str,
339        values: &str,
340        orientation: Option<Orientation>,
341        group: Option<&str>,
342        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
343        box_points: Option<bool>,
344        point_offset: Option<f64>,
345        jitter: Option<f64>,
346        opacity: Option<f64>,
347        color: Option<Rgb>,
348        colors: Option<Vec<Rgb>>,
349    ) -> Vec<TraceIR> {
350        let mut traces = Vec::new();
351
352        match group {
353            Some(group_col) => {
354                let groups = crate::data::get_unique_groups(data, group_col, sort_groups_by);
355
356                for (i, group_name) in groups.iter().enumerate() {
357                    let subset = crate::data::filter_data_by_group(data, group_col, group_name);
358
359                    let marker_ir = MarkerIR {
360                        opacity,
361                        size: None,
362                        color: Self::resolve_color(i, color, colors.clone()),
363                        shape: None,
364                    };
365
366                    traces.push(TraceIR::BoxPlot(BoxPlotIR {
367                        labels: ColumnData::String(crate::data::get_string_column(&subset, labels)),
368                        values: ColumnData::Numeric(crate::data::get_numeric_column(
369                            &subset, values,
370                        )),
371                        name: Some(group_name.to_string()),
372                        orientation: orientation.clone(),
373                        marker: Some(marker_ir),
374                        box_points,
375                        point_offset,
376                        jitter,
377                        show_legend: None,
378                        legend_group: None,
379                        subplot_ref: None,
380                    }));
381                }
382            }
383            None => {
384                let marker_ir = MarkerIR {
385                    opacity,
386                    size: None,
387                    color: Self::resolve_color(0, color, colors),
388                    shape: None,
389                };
390
391                traces.push(TraceIR::BoxPlot(BoxPlotIR {
392                    labels: ColumnData::String(crate::data::get_string_column(data, labels)),
393                    values: ColumnData::Numeric(crate::data::get_numeric_column(data, values)),
394                    name: None,
395                    orientation: orientation.clone(),
396                    marker: Some(marker_ir),
397                    box_points,
398                    point_offset,
399                    jitter,
400                    show_legend: None,
401                    legend_group: None,
402                    subplot_ref: None,
403                }));
404            }
405        }
406
407        traces
408    }
409
410    #[allow(clippy::too_many_arguments)]
411    fn create_ir_traces_faceted(
412        data: &DataFrame,
413        labels: &str,
414        values: &str,
415        orientation: Option<Orientation>,
416        group: Option<&str>,
417        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
418        facet_column: &str,
419        config: &FacetConfig,
420        box_points: Option<bool>,
421        point_offset: Option<f64>,
422        jitter: Option<f64>,
423        opacity: Option<f64>,
424        color: Option<Rgb>,
425        colors: Option<Vec<Rgb>>,
426    ) -> Vec<TraceIR> {
427        const MAX_FACETS: usize = 8;
428
429        let facet_categories = crate::data::get_unique_groups(data, facet_column, config.sorter);
430
431        if facet_categories.len() > MAX_FACETS {
432            panic!(
433                "Facet column '{}' has {} unique values, but plotly.rs supports maximum {} subplots",
434                facet_column,
435                facet_categories.len(),
436                MAX_FACETS
437            );
438        }
439
440        if let Some(ref color_vec) = colors {
441            if group.is_none() {
442                let color_count = color_vec.len();
443                let facet_count = facet_categories.len();
444                if color_count != facet_count {
445                    panic!(
446                        "When using colors with facet (without group), colors.len() must equal number of facets. \
447                         Expected {} colors for {} facets, but got {} colors. \
448                         Each facet must be assigned exactly one color.",
449                        facet_count, facet_count, color_count
450                    );
451                }
452            } else if let Some(group_col) = group {
453                let groups = crate::data::get_unique_groups(data, group_col, sort_groups_by);
454                let color_count = color_vec.len();
455                let group_count = groups.len();
456                if color_count < group_count {
457                    panic!(
458                        "When using colors with group, colors.len() must be >= number of groups. \
459                         Need at least {} colors for {} groups, but got {} colors",
460                        group_count, group_count, color_count
461                    );
462                }
463            }
464        }
465
466        let global_group_indices: std::collections::HashMap<String, usize> =
467            if let Some(group_col) = group {
468                let global_groups = crate::data::get_unique_groups(data, group_col, sort_groups_by);
469                global_groups
470                    .into_iter()
471                    .enumerate()
472                    .map(|(idx, group_name)| (group_name, idx))
473                    .collect()
474            } else {
475                std::collections::HashMap::new()
476            };
477
478        let colors = if group.is_some() && colors.is_none() {
479            Some(DEFAULT_PLOTLY_COLORS.to_vec())
480        } else {
481            colors
482        };
483
484        let mut traces = Vec::new();
485
486        for (facet_idx, facet_value) in facet_categories.iter().enumerate() {
487            let facet_data = crate::data::filter_data_by_group(data, facet_column, facet_value);
488
489            let subplot_ref = format!(
490                "{}{}",
491                crate::faceting::get_axis_reference(facet_idx, "x"),
492                crate::faceting::get_axis_reference(facet_idx, "y")
493            );
494
495            match group {
496                Some(group_col) => {
497                    let groups =
498                        crate::data::get_unique_groups(&facet_data, group_col, sort_groups_by);
499
500                    for group_val in groups.iter() {
501                        let group_data =
502                            crate::data::filter_data_by_group(&facet_data, group_col, group_val);
503
504                        let global_idx = global_group_indices.get(group_val).copied().unwrap_or(0);
505
506                        let marker_ir = MarkerIR {
507                            opacity,
508                            size: None,
509                            color: Self::resolve_color(global_idx, color, colors.clone()),
510                            shape: None,
511                        };
512
513                        traces.push(TraceIR::BoxPlot(BoxPlotIR {
514                            labels: ColumnData::String(crate::data::get_string_column(
515                                &group_data,
516                                labels,
517                            )),
518                            values: ColumnData::Numeric(crate::data::get_numeric_column(
519                                &group_data,
520                                values,
521                            )),
522                            name: Some(group_val.to_string()),
523                            orientation: orientation.clone(),
524                            marker: Some(marker_ir),
525                            box_points,
526                            point_offset,
527                            jitter,
528                            show_legend: Some(facet_idx == 0),
529                            legend_group: Some(group_val.to_string()),
530                            subplot_ref: Some(subplot_ref.clone()),
531                        }));
532                    }
533                }
534                None => {
535                    let marker_ir = MarkerIR {
536                        opacity,
537                        size: None,
538                        color: Self::resolve_color(facet_idx, color, colors.clone()),
539                        shape: None,
540                    };
541
542                    traces.push(TraceIR::BoxPlot(BoxPlotIR {
543                        labels: ColumnData::String(crate::data::get_string_column(
544                            &facet_data,
545                            labels,
546                        )),
547                        values: ColumnData::Numeric(crate::data::get_numeric_column(
548                            &facet_data,
549                            values,
550                        )),
551                        name: None,
552                        orientation: orientation.clone(),
553                        marker: Some(marker_ir),
554                        box_points,
555                        point_offset,
556                        jitter,
557                        show_legend: Some(false),
558                        legend_group: None,
559                        subplot_ref: Some(subplot_ref.clone()),
560                    }));
561                }
562            }
563        }
564
565        traces
566    }
567
568    fn resolve_color(index: usize, color: Option<Rgb>, colors: Option<Vec<Rgb>>) -> Option<Rgb> {
569        if let Some(c) = color {
570            return Some(c);
571        }
572        if let Some(ref cs) = colors {
573            return cs.get(index).copied();
574        }
575        None
576    }
577}
578
579impl crate::Plot for BoxPlot {
580    fn ir_traces(&self) -> &[TraceIR] {
581        &self.traces
582    }
583
584    fn ir_layout(&self) -> &LayoutIR {
585        &self.layout
586    }
587}
588
589#[cfg(test)]
590mod tests {
591    use super::*;
592    use crate::Plot;
593    use polars::prelude::*;
594
595    fn assert_rgb(actual: Option<Rgb>, r: u8, g: u8, b: u8) {
596        let c = actual.expect("expected Some(Rgb)");
597        assert_eq!((c.0, c.1, c.2), (r, g, b));
598    }
599
600    #[test]
601    fn test_basic_one_trace() {
602        let df = df![
603            "labels" => ["a", "b", "c"],
604            "values" => [1.0, 2.0, 3.0]
605        ]
606        .unwrap();
607        let plot = BoxPlot::builder()
608            .data(&df)
609            .labels("labels")
610            .values("values")
611            .build();
612        assert_eq!(plot.ir_traces().len(), 1);
613        assert!(matches!(plot.ir_traces()[0], TraceIR::BoxPlot(_)));
614    }
615
616    #[test]
617    fn test_with_group() {
618        let df = df![
619            "labels" => ["a", "b", "a", "b"],
620            "values" => [1.0, 2.0, 3.0, 4.0],
621            "g" => ["x", "x", "y", "y"]
622        ]
623        .unwrap();
624        let plot = BoxPlot::builder()
625            .data(&df)
626            .labels("labels")
627            .values("values")
628            .group("g")
629            .build();
630        assert_eq!(plot.ir_traces().len(), 2);
631    }
632
633    #[test]
634    fn test_resolve_color_singular_priority() {
635        let result = BoxPlot::resolve_color(0, Some(Rgb(255, 0, 0)), Some(vec![Rgb(0, 0, 255)]));
636        assert_rgb(result, 255, 0, 0);
637    }
638
639    #[test]
640    fn test_resolve_color_both_none() {
641        let result = BoxPlot::resolve_color(0, None, None);
642        assert!(result.is_none());
643    }
644
645    #[test]
646    fn test_layout_has_axes() {
647        let df = df![
648            "labels" => ["a", "b"],
649            "values" => [1.0, 2.0]
650        ]
651        .unwrap();
652        let plot = BoxPlot::builder()
653            .data(&df)
654            .labels("labels")
655            .values("values")
656            .build();
657        assert!(plot.ir_layout().axes_2d.is_some());
658    }
659}