plotlars/plots/
boxplot.rs

1use bon::bon;
2
3use plotly::{
4    box_plot::BoxPoints, common::Marker as MarkerPlotly, layout::BoxMode, BoxPlot as BoxPlotly,
5    Layout as LayoutPlotly, Trace,
6};
7
8use polars::frame::DataFrame;
9use serde::Serialize;
10
11use crate::{
12    common::{Layout, Marker, PlotHelper, Polar},
13    components::{Axis, Legend, Orientation, Rgb, Text},
14};
15
16/// A structure representing a box plot.
17///
18/// The `BoxPlot` struct facilitates the creation and customization of box plots with various options
19/// for data selection, layout configuration, and aesthetic adjustments. It supports both horizontal
20/// and vertical orientations, grouping of data, display of individual data points with jitter and offset,
21/// opacity settings, and customizable markers and colors.
22///
23/// # Arguments
24///
25/// * `data` - A reference to the `DataFrame` containing the data to be plotted.
26/// * `labels` - A string slice specifying the column name to be used for the x-axis (independent variable).
27/// * `values` - A string slice specifying the column name to be used for the y-axis (dependent variable).
28/// * `orientation` - An optional `Orientation` enum specifying whether the plot should be horizontal or vertical.
29/// * `group` - An optional string slice specifying the column name to be used for grouping data points.
30/// * `sort_groups_by` - Optional comparator `fn(&str, &str) -> std::cmp::Ordering` to control group ordering. Groups are sorted lexically by default.
31/// * `box_points` - An optional boolean indicating whether individual data points should be plotted along with the box plot.
32/// * `point_offset` - An optional `f64` value specifying the horizontal offset for individual data points when `box_points` is enabled.
33/// * `jitter` - An optional `f64` value indicating the amount of jitter (random noise) to apply to individual data points for visibility.
34/// * `opacity` - An optional `f64` value specifying the opacity of the plot markers (range: 0.0 to 1.0).
35/// * `color` - An optional `Rgb` value specifying the color of the markers to be used for the plot. This is used when `group` is not specified.
36/// * `colors` - An optional vector of `Rgb` values specifying the colors to be used for the plot. This is used when `group` is specified to differentiate between groups.
37/// * `plot_title` - An optional `Text` struct specifying the title of the plot.
38/// * `x_title` - An optional `Text` struct specifying the title of the x-axis.
39/// * `y_title` - An optional `Text` struct specifying the title of the y-axis.
40/// * `legend_title` - An optional `Text` struct specifying the title of the legend.
41/// * `x_axis` - An optional reference to an `Axis` struct for customizing the x-axis.
42/// * `y_axis` - An optional reference to an `Axis` struct for customizing the y-axis.
43/// * `legend` - An optional reference to a `Legend` struct for customizing the legend of the plot (e.g., positioning, font, etc.).
44///
45/// # Example
46///
47/// ```rust
48/// use polars::prelude::*;
49/// use plotlars::{Axis, BoxPlot, Legend, Orientation, Plot, Rgb, Text};
50///
51/// let dataset = LazyCsvReader::new(PlPath::new("data/penguins.csv"))
52///     .finish()
53///     .unwrap()
54///     .select([
55///         col("species"),
56///         col("sex").alias("gender"),
57///         col("flipper_length_mm").cast(DataType::Int16),
58///         col("body_mass_g").cast(DataType::Int16),
59///     ])
60///     .collect()
61///     .unwrap();
62///
63/// BoxPlot::builder()
64///     .data(&dataset)
65///     .labels("species")
66///     .values("body_mass_g")
67///     .orientation(Orientation::Vertical)
68///     .group("gender")
69///     .box_points(true)
70///     .point_offset(-1.5)
71///     .jitter(0.01)
72///     .opacity(0.1)
73///     .colors(vec![
74///         Rgb(0, 191, 255),
75///         Rgb(57, 255, 20),
76///         Rgb(255, 105, 180),
77///     ])
78///     .plot_title(
79///         Text::from("Box Plot")
80///             .font("Arial")
81///             .size(18)
82///     )
83///     .x_title(
84///         Text::from("species")
85///             .font("Arial")
86///             .size(15)
87///     )
88///     .y_title(
89///         Text::from("body mass (g)")
90///             .font("Arial")
91///             .size(15)
92///     )
93///     .legend_title(
94///         Text::from("gender")
95///             .font("Arial")
96///             .size(15)
97///     )
98///     .y_axis(
99///         &Axis::new()
100///             .value_thousands(true)
101///     )
102///     .legend(
103///         &Legend::new()
104///             .border_width(1)
105///             .x(0.9)
106///     )
107///     .build()
108///     .plot();
109/// ```
110///
111/// ![Example](https://imgur.com/uj1LY90.png)
112#[derive(Clone, Serialize)]
113pub struct BoxPlot {
114    traces: Vec<Box<dyn Trace + 'static>>,
115    layout: LayoutPlotly,
116}
117
118#[bon]
119impl BoxPlot {
120    #[builder(on(String, into), on(Text, into))]
121    pub fn new(
122        data: &DataFrame,
123        labels: &str,
124        values: &str,
125        orientation: Option<Orientation>,
126        group: Option<&str>,
127        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
128        box_points: Option<bool>,
129        point_offset: Option<f64>,
130        jitter: Option<f64>,
131        opacity: Option<f64>,
132        color: Option<Rgb>,
133        colors: Option<Vec<Rgb>>,
134        plot_title: Option<Text>,
135        x_title: Option<Text>,
136        y_title: Option<Text>,
137        legend_title: Option<Text>,
138        x_axis: Option<&Axis>,
139        y_axis: Option<&Axis>,
140        legend: Option<&Legend>,
141    ) -> Self {
142        let z_title = None;
143        let z_axis = None;
144        let y2_title = None;
145        let y2_axis = None;
146
147        let mut layout = Self::create_layout(
148            plot_title,
149            x_title,
150            y_title,
151            y2_title,
152            z_title,
153            legend_title,
154            x_axis,
155            y_axis,
156            y2_axis,
157            z_axis,
158            legend,
159        );
160
161        layout = layout.box_mode(BoxMode::Group);
162
163        let traces = Self::create_traces(
164            data,
165            labels,
166            values,
167            orientation,
168            group,
169            sort_groups_by,
170            box_points,
171            point_offset,
172            jitter,
173            opacity,
174            color,
175            colors,
176        );
177
178        Self { traces, layout }
179    }
180
181    #[allow(clippy::too_many_arguments)]
182    fn create_traces(
183        data: &DataFrame,
184        labels: &str,
185        values: &str,
186        orientation: Option<Orientation>,
187        group: Option<&str>,
188        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
189        box_points: Option<bool>,
190        point_offset: Option<f64>,
191        jitter: Option<f64>,
192        opacity: Option<f64>,
193        color: Option<Rgb>,
194        colors: Option<Vec<Rgb>>,
195    ) -> Vec<Box<dyn Trace + 'static>> {
196        let mut traces: Vec<Box<dyn Trace + 'static>> = Vec::new();
197
198        let size = None;
199        let shape = None;
200        let shapes = None;
201
202        match group {
203            Some(group_col) => {
204                let groups = Self::get_unique_groups(data, group_col, sort_groups_by);
205
206                let groups = groups.iter().map(|s| s.as_str());
207
208                for (i, group) in groups.enumerate() {
209                    let marker = Self::create_marker(
210                        i,
211                        opacity,
212                        size,
213                        color,
214                        colors.clone(),
215                        shape,
216                        shapes.clone(),
217                    );
218
219                    let subset = Self::filter_data_by_group(data, group_col, group);
220
221                    let trace = Self::create_trace(
222                        &subset,
223                        labels,
224                        values,
225                        orientation.clone(),
226                        Some(group),
227                        box_points,
228                        point_offset,
229                        jitter,
230                        marker,
231                    );
232
233                    traces.push(trace);
234                }
235            }
236            None => {
237                let group = None;
238
239                let marker = Self::create_marker(
240                    0,
241                    opacity,
242                    size,
243                    color,
244                    colors.clone(),
245                    shape,
246                    shapes.clone(),
247                );
248
249                let trace = Self::create_trace(
250                    data,
251                    labels,
252                    values,
253                    orientation,
254                    group,
255                    box_points,
256                    point_offset,
257                    jitter,
258                    marker,
259                );
260
261                traces.push(trace);
262            }
263        }
264
265        traces
266    }
267
268    #[allow(clippy::too_many_arguments)]
269    fn create_trace(
270        data: &DataFrame,
271        labels: &str,
272        values: &str,
273        orientation: Option<Orientation>,
274        group_name: Option<&str>,
275        box_points: Option<bool>,
276        point_offset: Option<f64>,
277        jitter: Option<f64>,
278        marker: MarkerPlotly,
279    ) -> Box<dyn Trace + 'static> {
280        let category_data = Self::get_string_column(data, labels);
281        let value_data = Self::get_numeric_column(data, values);
282
283        let orientation = orientation.unwrap_or(Orientation::Vertical);
284
285        match orientation {
286            Orientation::Vertical => {
287                let mut trace = BoxPlotly::default()
288                    .x(category_data)
289                    .y(value_data)
290                    .orientation(orientation.to_plotly());
291
292                if let Some(all) = box_points {
293                    if all {
294                        trace = trace.box_points(BoxPoints::All);
295                    } else {
296                        trace = trace.box_points(BoxPoints::False);
297                    }
298                }
299
300                if let Some(point_position) = point_offset {
301                    trace = trace.point_pos(point_position);
302                }
303
304                if let Some(jitter) = jitter {
305                    trace = trace.jitter(jitter);
306                }
307
308                trace = trace.marker(marker);
309
310                if let Some(name) = group_name {
311                    trace = trace.name(name);
312                }
313
314                trace
315            }
316            Orientation::Horizontal => {
317                let mut trace = BoxPlotly::default()
318                    .x(value_data)
319                    .y(category_data)
320                    .orientation(orientation.to_plotly());
321
322                if let Some(all) = box_points {
323                    if all {
324                        trace = trace.box_points(BoxPoints::All);
325                    } else {
326                        trace = trace.box_points(BoxPoints::False);
327                    }
328                }
329
330                if let Some(point_position) = point_offset {
331                    trace = trace.point_pos(point_position);
332                }
333
334                if let Some(jitter) = jitter {
335                    trace = trace.jitter(jitter);
336                }
337
338                trace = trace.marker(marker);
339
340                if let Some(name) = group_name {
341                    trace = trace.name(name);
342                }
343
344                trace
345            }
346        }
347    }
348}
349
350impl Layout for BoxPlot {}
351impl Marker for BoxPlot {}
352impl Polar for BoxPlot {}
353
354impl PlotHelper for BoxPlot {
355    fn get_layout(&self) -> &LayoutPlotly {
356        &self.layout
357    }
358
359    fn get_traces(&self) -> &Vec<Box<dyn Trace + 'static>> {
360        &self.traces
361    }
362}