Skip to main content

plotlars_core/plots/
histogram.rs

1use bon::bon;
2
3use polars::frame::DataFrame;
4
5use crate::{
6    components::{Axis, FacetConfig, FacetScales, Legend, Rgb, Text, DEFAULT_PLOTLY_COLORS},
7    ir::data::ColumnData,
8    ir::layout::LayoutIR,
9    ir::marker::MarkerIR,
10    ir::trace::{BinsIR, HistogramIR, TraceIR},
11};
12
13/// A structure representing a histogram.
14///
15/// The `Histogram` struct facilitates the creation and customization of histograms with various options
16/// for data selection, layout configuration, and aesthetic adjustments. It supports grouping of data,
17/// opacity settings, and customizable markers and colors.
18///
19/// # Backend Support
20///
21/// | Backend | Supported |
22/// |---------|-----------|
23/// | Plotly  | Yes       |
24/// | Plotters| Yes       |
25///
26/// # Arguments
27///
28/// * `data` - A reference to the `DataFrame` containing the data to be plotted.
29/// * `x` - A string slice specifying the column name to be used for the x-axis (independent variable).
30/// * `group` - An optional string slice specifying the column name to be used for grouping data points.
31/// * `sort_groups_by` - Optional comparator `fn(&str, &str) -> std::cmp::Ordering` to control group ordering. Groups are sorted lexically by default.
32/// * `facet` - An optional string slice specifying the column name to be used for faceting (creating multiple subplots).
33/// * `facet_config` - An optional reference to a `FacetConfig` struct for customizing facet behavior (grid dimensions, scales, gaps, etc.).
34/// * `opacity` - An optional `f64` value specifying the opacity of the plot markers (range: 0.0 to 1.0).
35/// * `color` - An optional `Rgb` value specifying the color of the markers to be used for the plot. This is used when `group` is not specified.
36/// * `colors` - An optional vector of `Rgb` values specifying the colors to be used for the plot. This is used when `group` is specified to differentiate between groups.
37/// * `plot_title` - An optional `Text` struct specifying the title of the plot.
38/// * `x_title` - An optional `Text` struct specifying the title of the x-axis.
39/// * `y_title` - An optional `Text` struct specifying the title of the y-axis.
40/// * `legend_title` - An optional `Text` struct specifying the title of the legend.
41/// * `x_axis` - An optional reference to an `Axis` struct for customizing the x-axis.
42/// * `y_axis` - An optional reference to an `Axis` struct for customizing the y-axis.
43/// * `legend` - An optional reference to a `Legend` struct for customizing the legend of the plot (e.g., positioning, font, etc.).
44///
45/// # Example
46///
47/// ```rust
48/// use plotlars::{Axis, Histogram, Legend, Plot, Rgb, Text, TickDirection};
49/// use polars::prelude::*;
50///
51/// let dataset = LazyCsvReader::new(PlRefPath::new("data/penguins.csv"))
52///     .finish()
53///     .unwrap()
54///     .select([
55///         col("species"),
56///         col("sex").alias("gender"),
57///         col("flipper_length_mm").cast(DataType::Int16),
58///         col("body_mass_g").cast(DataType::Int16),
59///     ])
60///     .collect()
61///     .unwrap();
62///
63/// let axis = Axis::new()
64///     .show_line(true)
65///     .show_grid(true)
66///     .value_thousands(true)
67///     .tick_direction(TickDirection::OutSide);
68///
69/// Histogram::builder()
70///     .data(&dataset)
71///     .x("body_mass_g")
72///     .group("species")
73///     .opacity(0.5)
74///     .colors(vec![
75///         Rgb(255, 165, 0),
76///         Rgb(147, 112, 219),
77///         Rgb(46, 139, 87),
78///     ])
79///     .plot_title(
80///         Text::from("Histogram")
81///             .font("Arial")
82///             .size(18)
83///     )
84///     .x_title(
85///         Text::from("body mass (g)")
86///             .font("Arial")
87///             .size(15)
88///     )
89///     .y_title(
90///         Text::from("count")
91///             .font("Arial")
92///             .size(15)
93///     )
94///     .legend_title(
95///         Text::from("species")
96///             .font("Arial")
97///             .size(15)
98///     )
99///     .x_axis(&axis)
100///     .y_axis(&axis)
101///     .legend(
102///         &Legend::new()
103///             .x(0.9)
104///     )
105///     .build()
106///     .plot();
107/// ```
108///
109/// ![Example](https://imgur.com/w2oiuIo.png)
110#[derive(Clone)]
111#[allow(dead_code)]
112pub struct Histogram {
113    traces: Vec<TraceIR>,
114    layout: LayoutIR,
115}
116
117#[bon]
118impl Histogram {
119    #[builder(on(String, into), on(Text, into))]
120    pub fn new(
121        data: &DataFrame,
122        x: &str,
123        group: Option<&str>,
124        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
125        facet: Option<&str>,
126        facet_config: Option<&FacetConfig>,
127        opacity: Option<f64>,
128        color: Option<Rgb>,
129        colors: Option<Vec<Rgb>>,
130        plot_title: Option<Text>,
131        x_title: Option<Text>,
132        y_title: Option<Text>,
133        legend_title: Option<Text>,
134        x_axis: Option<&Axis>,
135        y_axis: Option<&Axis>,
136        legend: Option<&Legend>,
137    ) -> Self {
138        let grid = facet.map(|facet_column| {
139            let config = facet_config.cloned().unwrap_or_default();
140            let facet_categories =
141                crate::data::get_unique_groups(data, facet_column, config.sorter);
142            let n_facets = facet_categories.len();
143            let (ncols, nrows) =
144                crate::faceting::calculate_grid_dimensions(n_facets, config.cols, config.rows);
145            crate::ir::facet::GridSpec {
146                kind: crate::ir::facet::FacetKind::Axis,
147                rows: nrows,
148                cols: ncols,
149                h_gap: config.h_gap,
150                v_gap: config.v_gap,
151                scales: config.scales.clone(),
152                n_facets,
153                facet_categories,
154                title_style: config.title_style.clone(),
155                x_title: x_title.clone(),
156                y_title: y_title.clone(),
157                x_axis: x_axis.cloned(),
158                y_axis: y_axis.cloned(),
159                legend_title: legend_title.clone(),
160                legend: legend.cloned(),
161            }
162        });
163
164        let layout = LayoutIR {
165            title: plot_title.clone(),
166            x_title: if grid.is_some() {
167                None
168            } else {
169                x_title.clone()
170            },
171            y_title: if grid.is_some() {
172                None
173            } else {
174                y_title.clone()
175            },
176            y2_title: None,
177            z_title: None,
178            legend_title: if grid.is_some() {
179                None
180            } else {
181                legend_title.clone()
182            },
183            legend: if grid.is_some() {
184                None
185            } else {
186                legend.cloned()
187            },
188            dimensions: None,
189            bar_mode: Some(crate::components::BarMode::Overlay),
190            box_mode: None,
191            box_gap: None,
192            margin_bottom: None,
193            axes_2d: if grid.is_some() {
194                None
195            } else {
196                Some(crate::ir::layout::Axes2dIR {
197                    x_axis: x_axis.cloned(),
198                    y_axis: y_axis.cloned(),
199                    y2_axis: None,
200                })
201            },
202            scene_3d: None,
203            polar: None,
204            mapbox: None,
205            grid,
206            annotations: vec![],
207        };
208
209        let traces = match facet {
210            Some(facet_column) => {
211                let config = facet_config.cloned().unwrap_or_default();
212                Self::create_ir_traces_faceted(
213                    data,
214                    x,
215                    group,
216                    sort_groups_by,
217                    facet_column,
218                    &config,
219                    opacity,
220                    color,
221                    colors.clone(),
222                )
223            }
224            None => Self::create_ir_traces(data, x, group, sort_groups_by, opacity, color, colors),
225        };
226
227        Self { traces, layout }
228    }
229}
230
231#[bon]
232impl Histogram {
233    #[builder(
234        start_fn = try_builder,
235        finish_fn = try_build,
236        builder_type = HistogramTryBuilder,
237        on(String, into),
238        on(Text, into),
239    )]
240    pub fn try_new(
241        data: &DataFrame,
242        x: &str,
243        group: Option<&str>,
244        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
245        facet: Option<&str>,
246        facet_config: Option<&FacetConfig>,
247        opacity: Option<f64>,
248        color: Option<Rgb>,
249        colors: Option<Vec<Rgb>>,
250        plot_title: Option<Text>,
251        x_title: Option<Text>,
252        y_title: Option<Text>,
253        legend_title: Option<Text>,
254        x_axis: Option<&Axis>,
255        y_axis: Option<&Axis>,
256        legend: Option<&Legend>,
257    ) -> Result<Self, crate::io::PlotlarsError> {
258        std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
259            Self::__orig_new(
260                data,
261                x,
262                group,
263                sort_groups_by,
264                facet,
265                facet_config,
266                opacity,
267                color,
268                colors,
269                plot_title,
270                x_title,
271                y_title,
272                legend_title,
273                x_axis,
274                y_axis,
275                legend,
276            )
277        }))
278        .map_err(|panic| {
279            let msg = panic
280                .downcast_ref::<String>()
281                .cloned()
282                .or_else(|| panic.downcast_ref::<&str>().map(|s| s.to_string()))
283                .unwrap_or_else(|| "unknown error".to_string());
284            crate::io::PlotlarsError::PlotBuild { message: msg }
285        })
286    }
287}
288
289impl Histogram {
290    fn should_use_global_bins(scales: &FacetScales) -> bool {
291        match scales {
292            FacetScales::Fixed | FacetScales::FreeY => true,
293            FacetScales::Free | FacetScales::FreeX => false,
294        }
295    }
296    fn calculate_global_bins_ir(data: &DataFrame, x: &str) -> BinsIR {
297        let x_data = crate::data::get_numeric_column(data, x);
298
299        let values: Vec<f32> = x_data.iter().filter_map(|v| *v).collect();
300
301        if values.is_empty() {
302            return BinsIR {
303                start: 0.0,
304                end: 1.0,
305                size: 0.1,
306            };
307        }
308
309        let min = values.iter().cloned().fold(f32::INFINITY, f32::min);
310        let max = values.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
311
312        let n = values.len() as f32;
313        let nbins = (n.sqrt().ceil() as usize).clamp(10, 100);
314
315        let range = max - min;
316        let bin_size = if range > 0.0 {
317            range / nbins as f32
318        } else {
319            1.0
320        };
321
322        BinsIR {
323            start: min as f64,
324            end: max as f64,
325            size: bin_size as f64,
326        }
327    }
328
329    fn create_ir_traces(
330        data: &DataFrame,
331        x: &str,
332        group: Option<&str>,
333        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
334        opacity: Option<f64>,
335        color: Option<Rgb>,
336        colors: Option<Vec<Rgb>>,
337    ) -> Vec<TraceIR> {
338        let mut traces = Vec::new();
339
340        match group {
341            Some(group_col) => {
342                let groups = crate::data::get_unique_groups(data, group_col, sort_groups_by);
343
344                for (i, group_name) in groups.iter().enumerate() {
345                    let subset = crate::data::filter_data_by_group(data, group_col, group_name);
346
347                    let marker_ir = MarkerIR {
348                        opacity,
349                        size: None,
350                        color: Self::resolve_color(i, color, colors.clone()),
351                        shape: None,
352                    };
353
354                    traces.push(TraceIR::Histogram(HistogramIR {
355                        x: ColumnData::Numeric(crate::data::get_numeric_column(&subset, x)),
356                        name: Some(group_name.to_string()),
357                        marker: Some(marker_ir),
358                        bins: None,
359                        show_legend: None,
360                        legend_group: None,
361                        subplot_ref: None,
362                    }));
363                }
364            }
365            None => {
366                let marker_ir = MarkerIR {
367                    opacity,
368                    size: None,
369                    color: Self::resolve_color(0, color, colors),
370                    shape: None,
371                };
372
373                traces.push(TraceIR::Histogram(HistogramIR {
374                    x: ColumnData::Numeric(crate::data::get_numeric_column(data, x)),
375                    name: None,
376                    marker: Some(marker_ir),
377                    bins: None,
378                    show_legend: None,
379                    legend_group: None,
380                    subplot_ref: None,
381                }));
382            }
383        }
384
385        traces
386    }
387
388    #[allow(clippy::too_many_arguments)]
389    fn create_ir_traces_faceted(
390        data: &DataFrame,
391        x: &str,
392        group: Option<&str>,
393        sort_groups_by: Option<fn(&str, &str) -> std::cmp::Ordering>,
394        facet_column: &str,
395        config: &FacetConfig,
396        opacity: Option<f64>,
397        color: Option<Rgb>,
398        colors: Option<Vec<Rgb>>,
399    ) -> Vec<TraceIR> {
400        const MAX_FACETS: usize = 8;
401
402        let facet_categories = crate::data::get_unique_groups(data, facet_column, config.sorter);
403
404        if facet_categories.len() > MAX_FACETS {
405            panic!(
406                "Facet column '{}' has {} unique values, but plotly.rs supports maximum {} subplots",
407                facet_column,
408                facet_categories.len(),
409                MAX_FACETS
410            );
411        }
412
413        if let Some(ref color_vec) = colors {
414            if group.is_none() {
415                let color_count = color_vec.len();
416                let facet_count = facet_categories.len();
417                if color_count != facet_count {
418                    panic!(
419                        "When using colors with facet (without group), colors.len() must equal number of facets. \
420                         Expected {} colors for {} facets, but got {} colors. \
421                         Each facet must be assigned exactly one color.",
422                        facet_count, facet_count, color_count
423                    );
424                }
425            } else if let Some(group_col) = group {
426                let groups = crate::data::get_unique_groups(data, group_col, sort_groups_by);
427                let color_count = color_vec.len();
428                let group_count = groups.len();
429                if color_count < group_count {
430                    panic!(
431                        "When using colors with group, colors.len() must be >= number of groups. \
432                         Need at least {} colors for {} groups, but got {} colors",
433                        group_count, group_count, color_count
434                    );
435                }
436            }
437        }
438
439        let global_group_indices: std::collections::HashMap<String, usize> =
440            if let Some(group_col) = group {
441                let global_groups = crate::data::get_unique_groups(data, group_col, sort_groups_by);
442                global_groups
443                    .into_iter()
444                    .enumerate()
445                    .map(|(idx, group_name)| (group_name, idx))
446                    .collect()
447            } else {
448                std::collections::HashMap::new()
449            };
450
451        let colors = if group.is_some() && colors.is_none() {
452            Some(DEFAULT_PLOTLY_COLORS.to_vec())
453        } else {
454            colors
455        };
456
457        let global_bins = Self::should_use_global_bins(&config.scales);
458        let bins_ir = if global_bins {
459            Some(Self::calculate_global_bins_ir(data, x))
460        } else {
461            None
462        };
463
464        let mut traces = Vec::new();
465
466        for (facet_idx, facet_value) in facet_categories.iter().enumerate() {
467            let facet_data = crate::data::filter_data_by_group(data, facet_column, facet_value);
468
469            let subplot_ref = format!(
470                "{}{}",
471                crate::faceting::get_axis_reference(facet_idx, "x"),
472                crate::faceting::get_axis_reference(facet_idx, "y")
473            );
474
475            match group {
476                Some(group_col) => {
477                    let groups =
478                        crate::data::get_unique_groups(&facet_data, group_col, sort_groups_by);
479
480                    for group_val in groups.iter() {
481                        let group_data =
482                            crate::data::filter_data_by_group(&facet_data, group_col, group_val);
483
484                        let global_idx = global_group_indices.get(group_val).copied().unwrap_or(0);
485
486                        let marker_ir = MarkerIR {
487                            opacity,
488                            size: None,
489                            color: Self::resolve_color(global_idx, color, colors.clone()),
490                            shape: None,
491                        };
492
493                        traces.push(TraceIR::Histogram(HistogramIR {
494                            x: ColumnData::Numeric(crate::data::get_numeric_column(&group_data, x)),
495                            name: Some(group_val.to_string()),
496                            marker: Some(marker_ir),
497                            bins: bins_ir.clone(),
498                            show_legend: Some(facet_idx == 0),
499                            legend_group: Some(group_val.to_string()),
500                            subplot_ref: Some(subplot_ref.clone()),
501                        }));
502                    }
503                }
504                None => {
505                    let marker_ir = MarkerIR {
506                        opacity,
507                        size: None,
508                        color: Self::resolve_color(facet_idx, color, colors.clone()),
509                        shape: None,
510                    };
511
512                    traces.push(TraceIR::Histogram(HistogramIR {
513                        x: ColumnData::Numeric(crate::data::get_numeric_column(&facet_data, x)),
514                        name: None,
515                        marker: Some(marker_ir),
516                        bins: bins_ir.clone(),
517                        show_legend: Some(false),
518                        legend_group: None,
519                        subplot_ref: Some(subplot_ref.clone()),
520                    }));
521                }
522            }
523        }
524
525        traces
526    }
527
528    fn resolve_color(index: usize, color: Option<Rgb>, colors: Option<Vec<Rgb>>) -> Option<Rgb> {
529        if let Some(c) = color {
530            return Some(c);
531        }
532        if let Some(ref cs) = colors {
533            return cs.get(index).copied();
534        }
535        None
536    }
537}
538
539impl crate::Plot for Histogram {
540    fn ir_traces(&self) -> &[TraceIR] {
541        &self.traces
542    }
543
544    fn ir_layout(&self) -> &LayoutIR {
545        &self.layout
546    }
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552    use crate::Plot;
553    use polars::prelude::*;
554
555    fn assert_rgb(actual: Option<Rgb>, r: u8, g: u8, b: u8) {
556        let c = actual.expect("expected Some(Rgb)");
557        assert_eq!((c.0, c.1, c.2), (r, g, b));
558    }
559
560    #[test]
561    fn test_basic_one_trace() {
562        let df = df!["x" => [1.0, 2.0, 3.0, 4.0, 5.0]].unwrap();
563        let plot = Histogram::builder().data(&df).x("x").build();
564        assert_eq!(plot.ir_traces().len(), 1);
565        assert!(matches!(plot.ir_traces()[0], TraceIR::Histogram(_)));
566    }
567
568    #[test]
569    fn test_with_group() {
570        let df = df![
571            "x" => [1.0, 2.0, 3.0, 4.0],
572            "g" => ["a", "b", "a", "b"]
573        ]
574        .unwrap();
575        let plot = Histogram::builder().data(&df).x("x").group("g").build();
576        assert_eq!(plot.ir_traces().len(), 2);
577    }
578
579    #[test]
580    fn test_resolve_color_singular_priority() {
581        let result = Histogram::resolve_color(0, Some(Rgb(255, 0, 0)), Some(vec![Rgb(0, 0, 255)]));
582        assert_rgb(result, 255, 0, 0);
583    }
584
585    #[test]
586    fn test_layout_has_axes() {
587        let df = df!["x" => [1.0, 2.0, 3.0]].unwrap();
588        let plot = Histogram::builder().data(&df).x("x").build();
589        assert!(plot.ir_layout().axes_2d.is_some());
590    }
591
592    #[test]
593    fn test_faceted_trace_count() {
594        let df = df![
595            "x" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
596            "facet_col" => ["a", "a", "a", "b", "b", "b"]
597        ]
598        .unwrap();
599        let plot = Histogram::builder()
600            .data(&df)
601            .x("x")
602            .facet("facet_col")
603            .build();
604        // 2 facets, no group = 2 traces
605        assert_eq!(plot.ir_traces().len(), 2);
606    }
607}