Skip to main content

ggplot_rs/
build.rs

1use crate::aes::{mapping::apply_after_stat, mapping::resolve_mappings, Aes, Aesthetic};
2use crate::annotate::Annotation;
3use crate::coord::Coord;
4use crate::data::DataFrame;
5use crate::facet::{Facet, FacetScales, Panel};
6use crate::geom::Geom;
7use crate::plot::{GGError, GGPlot, Labels, Layer};
8use crate::position::PositionParams;
9use crate::scale::ScaleSet;
10use crate::theme::Theme;
11
12/// A built layer ready for rendering.
13pub struct BuiltLayer {
14    pub data: DataFrame,
15    pub geom: Box<dyn Geom>,
16    pub show_legend: Option<bool>,
17}
18
19/// A fully built plot ready for rendering.
20pub struct BuiltPlot {
21    pub layers: Vec<BuiltLayer>,
22    pub scales: ScaleSet,
23    pub coord: Box<dyn Coord>,
24    pub theme: Theme,
25    pub labels: Labels,
26    pub facet: Facet,
27    pub panels: Vec<Panel>,
28    /// Per-panel layer data. panels_data[panel_idx][layer_idx] = data for that panel+layer.
29    pub panels_data: Vec<Vec<DataFrame>>,
30    pub annotations: Vec<Annotation>,
31    pub guide_legend: crate::guide::config::GuideLegend,
32    /// Aesthetics suppressed from the legend (all layers with that aes set show_legend=false).
33    pub suppressed_aes: std::collections::HashSet<Aesthetic>,
34    /// Per-panel scale sets for free facets. Empty when FacetScales::Fixed.
35    pub panel_scales: Vec<ScaleSet>,
36}
37
38/// The grammar pipeline: transforms a GGPlot specification into render-ready data.
39pub struct PlotBuilder;
40
41impl PlotBuilder {
42    pub fn build(plot: GGPlot) -> Result<BuiltPlot, GGError> {
43        let GGPlot {
44            data: plot_data,
45            mapping: plot_mapping,
46            layers,
47            scales: user_scales,
48            coord,
49            theme,
50            labels,
51            facet,
52            annotations,
53            guide_legend,
54        } = plot;
55
56        let mut scale_set = ScaleSet::new();
57
58        // Add user-specified scales
59        for s in user_scales {
60            scale_set.add(s);
61        }
62
63        let mut built_layers = Vec::new();
64
65        // Faceting variables — used to group stat computation per panel so a
66        // computed stat (density/histogram) is estimated per panel, not pooled.
67        let facet_vars = Self::facet_vars(&facet);
68
69        for layer in layers {
70            let built = Self::build_layer(
71                layer,
72                &plot_data,
73                &plot_mapping,
74                &mut scale_set,
75                theme.primary,
76                &facet_vars,
77            )?;
78            built_layers.push(built);
79        }
80
81        // Final scale training pass across all layers
82        for bl in &built_layers {
83            scale_set.train_layer(&bl.data);
84        }
85
86        // Apply coord zoom limits (coord_cartesian xlim/ylim)
87        if let Some((min, max)) = coord.zoom_x() {
88            scale_set.set_limits(&Aesthetic::X, min, max);
89        }
90        if let Some((min, max)) = coord.zoom_y() {
91            scale_set.set_limits(&Aesthetic::Y, min, max);
92        }
93
94        // Compute facet panels
95        let (panels, panels_data) = Self::compute_facets(&facet, &built_layers, &plot_data);
96
97        // Compute suppressed aesthetics from show_legend flags.
98        let suppressed_aes = Self::compute_suppressed_aes(&built_layers);
99
100        // Compute per-panel scales for free facets
101        let facet_scales_mode = match &facet {
102            Facet::Wrap { scales, .. } => scales.clone(),
103            Facet::Grid { scales, .. } => scales.clone(),
104            Facet::None => FacetScales::Fixed,
105        };
106        let panel_scales = Self::compute_panel_scales(&facet_scales_mode, &panels_data, &scale_set);
107
108        Ok(BuiltPlot {
109            layers: built_layers,
110            scales: scale_set,
111            coord,
112            theme,
113            labels,
114            facet,
115            panels,
116            panels_data,
117            annotations,
118            guide_legend,
119            suppressed_aes,
120            panel_scales,
121        })
122    }
123
124    /// The column name(s) a facet splits on, if any.
125    fn facet_vars(facet: &Facet) -> Vec<String> {
126        match facet {
127            Facet::None => vec![],
128            Facet::Wrap { var, .. } => vec![var.clone()],
129            Facet::Grid {
130                row_var, col_var, ..
131            } => row_var.iter().chain(col_var.iter()).cloned().collect(),
132        }
133    }
134
135    fn compute_facets(
136        facet: &Facet,
137        built_layers: &[BuiltLayer],
138        _plot_data: &DataFrame,
139    ) -> (Vec<Panel>, Vec<Vec<DataFrame>>) {
140        match facet {
141            Facet::None => (vec![], vec![]),
142            Facet::Wrap {
143                var,
144                ncol,
145                labeller,
146                ..
147            } => {
148                // Collect unique levels from all layers' data
149                let mut levels: Vec<String> = Vec::new();
150                for bl in built_layers {
151                    if let Some(col) = bl.data.column(var) {
152                        for v in col {
153                            let key = v.to_group_key();
154                            if !levels.contains(&key) {
155                                levels.push(key);
156                            }
157                        }
158                    }
159                }
160
161                // Panels will be positioned during rendering (depends on layout)
162                let panels: Vec<Panel> = levels
163                    .iter()
164                    .enumerate()
165                    .map(|(i, value)| {
166                        let ncols =
167                            ncol.unwrap_or_else(|| (levels.len() as f64).sqrt().ceil() as usize);
168                        let formatted = labeller.format(var, value);
169                        Panel {
170                            row: i / ncols.max(1),
171                            col: i % ncols.max(1),
172                            label: formatted.clone(),
173                            row_label: None,
174                            col_label: Some(formatted),
175                            rect: crate::render::Rect {
176                                x: 0.0,
177                                y: 0.0,
178                                width: 0.0,
179                                height: 0.0,
180                            },
181                        }
182                    })
183                    .collect();
184
185                // Split data per panel per layer
186                let panels_data: Vec<Vec<DataFrame>> = levels
187                    .iter()
188                    .map(|level| {
189                        built_layers
190                            .iter()
191                            .map(|bl| Self::filter_data_by_var(&bl.data, var, level))
192                            .collect()
193                    })
194                    .collect();
195
196                (panels, panels_data)
197            }
198            Facet::Grid {
199                row_var,
200                col_var,
201                labeller,
202                ..
203            } => {
204                let mut row_levels: Vec<String> = Vec::new();
205                let mut col_levels: Vec<String> = Vec::new();
206
207                for bl in built_layers {
208                    if let Some(rv) = row_var {
209                        if let Some(col) = bl.data.column(rv) {
210                            for v in col {
211                                let key = v.to_group_key();
212                                if !row_levels.contains(&key) {
213                                    row_levels.push(key);
214                                }
215                            }
216                        }
217                    }
218                    if let Some(cv) = col_var {
219                        if let Some(col) = bl.data.column(cv) {
220                            for v in col {
221                                let key = v.to_group_key();
222                                if !col_levels.contains(&key) {
223                                    col_levels.push(key);
224                                }
225                            }
226                        }
227                    }
228                }
229
230                if row_levels.is_empty() {
231                    row_levels.push("".to_string());
232                }
233                if col_levels.is_empty() {
234                    col_levels.push("".to_string());
235                }
236
237                let mut panels = Vec::new();
238                let mut panels_data = Vec::new();
239
240                for (ri, rl) in row_levels.iter().enumerate() {
241                    for (ci, cl) in col_levels.iter().enumerate() {
242                        let row_fmt = if rl.is_empty() {
243                            None
244                        } else {
245                            let rv = row_var.as_deref().unwrap_or("");
246                            Some(labeller.format(rv, rl))
247                        };
248                        let col_fmt = if cl.is_empty() {
249                            None
250                        } else {
251                            let cv = col_var.as_deref().unwrap_or("");
252                            Some(labeller.format(cv, cl))
253                        };
254                        let label = match (&row_fmt, &col_fmt) {
255                            (Some(r), Some(c)) => format!("{r} | {c}"),
256                            (Some(r), None) => r.clone(),
257                            (None, Some(c)) => c.clone(),
258                            (None, None) => String::new(),
259                        };
260                        panels.push(Panel {
261                            row: ri,
262                            col: ci,
263                            label,
264                            row_label: row_fmt,
265                            col_label: col_fmt,
266                            rect: crate::render::Rect {
267                                x: 0.0,
268                                y: 0.0,
269                                width: 0.0,
270                                height: 0.0,
271                            },
272                        });
273
274                        let layer_data: Vec<DataFrame> = built_layers
275                            .iter()
276                            .map(|bl| {
277                                let mut data = bl.data.clone();
278                                if let Some(rv) = row_var {
279                                    if !rl.is_empty() {
280                                        data = Self::filter_data_by_var(&data, rv, rl);
281                                    }
282                                }
283                                if let Some(cv) = col_var {
284                                    if !cl.is_empty() {
285                                        data = Self::filter_data_by_var(&data, cv, cl);
286                                    }
287                                }
288                                data
289                            })
290                            .collect();
291                        panels_data.push(layer_data);
292                    }
293                }
294
295                (panels, panels_data)
296            }
297        }
298    }
299
300    fn filter_data_by_var(data: &DataFrame, var: &str, level: &str) -> DataFrame {
301        if let Some(col) = data.column(var) {
302            let indices: Vec<usize> = col
303                .iter()
304                .enumerate()
305                .filter(|(_, v)| v.to_group_key() == level)
306                .map(|(i, _)| i)
307                .collect();
308
309            let mut result = DataFrame::new();
310            for col_name in data.column_names() {
311                if let Some(src) = data.column(col_name) {
312                    let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
313                    result.add_column(col_name.to_string(), vals);
314                }
315            }
316            result
317        } else {
318            data.clone()
319        }
320    }
321
322    fn build_layer(
323        layer: Layer,
324        plot_data: &DataFrame,
325        plot_mapping: &Aes,
326        scale_set: &mut ScaleSet,
327        primary: Option<(u8, u8, u8)>,
328        facet_vars: &[String],
329    ) -> Result<BuiltLayer, GGError> {
330        let Layer {
331            data: layer_data,
332            mapping: layer_mapping,
333            mut geom,
334            stat,
335            position,
336            params: _,
337            show_legend,
338        } = layer;
339
340        // Step 1: Resolve data — use layer data if provided, else plot data
341        let source_data = layer_data.unwrap_or_else(|| plot_data.clone());
342
343        // Step 2: Merge mappings — layer overrides plot-level
344        let merged_mapping = plot_mapping.merge(&layer_mapping);
345
346        // Brand/primary color: apply to a single-series geom only when the layer
347        // maps neither color nor fill (an explicit aesthetic always wins).
348        if let Some(color) = primary {
349            let has_color = merged_mapping.get_mapping(&Aesthetic::Color).is_some();
350            let has_fill = merged_mapping.get_mapping(&Aesthetic::Fill).is_some();
351            if !has_color && !has_fill {
352                geom.set_series_color(color);
353            }
354        }
355
356        // Step 3: Evaluate aes — rename columns to canonical names
357        let mut working_data = resolve_mappings(&source_data, &merged_mapping);
358
359        // Remember which columns the user actually supplied (pre-stat). A required
360        // aesthetic is satisfied if it was present here OR is synthesized by the
361        // stat (checked after Step 6) — e.g. boxplot maps `y` then the stat turns
362        // it into ymin/ymax, while StatEcdf produces `y` that wasn't mapped.
363        let pre_stat_columns: Vec<String> = working_data
364            .column_names()
365            .iter()
366            .map(|s| s.to_string())
367            .collect();
368
369        // Step 4: Ensure scales exist for each mapped aesthetic
370        for m in &merged_mapping.mappings {
371            scale_set.ensure_scale(&m.aesthetic, &working_data);
372        }
373
374        // Step 5: Scale transformation (e.g., log10 before stats)
375        for scale in scale_set.iter() {
376            let col_name = scale.aesthetic().col_name().to_string();
377            if let Some(col) = working_data.column(&col_name) {
378                let transformed: Vec<_> = col.iter().map(|v| scale.transform(v)).collect();
379                let any_changed = transformed.iter().zip(col.iter()).any(|(t, o)| {
380                    match (t.as_f64(), o.as_f64()) {
381                        (Some(a), Some(b)) => (a - b).abs() > f64::EPSILON,
382                        _ => false,
383                    }
384                });
385                if any_changed {
386                    if let Some(col_mut) = working_data.column_mut(&col_name) {
387                        *col_mut = transformed;
388                    }
389                }
390            }
391        }
392
393        // Step 5b: Filter out-of-bounds data (xlim/ylim filter before stats)
394        Self::filter_oob_data(&mut working_data, scale_set);
395
396        // Step 6: Compute statistics. Group by aesthetic groups AND the facet
397        // variables, so a computed stat (density/histogram/…) is estimated per
398        // panel rather than on pooled data; the facet column is then re-attached
399        // to each group's output so faceting can split it back out.
400        let mut group_cols = Self::detect_group_columns(&working_data);
401        for fv in facet_vars {
402            if working_data.has_column(fv) && !group_cols.contains(fv) {
403                group_cols.push(fv.clone());
404            }
405        }
406
407        working_data = if !group_cols.is_empty() {
408            let groups =
409                working_data.group_by(&group_cols.iter().map(|s| s.as_str()).collect::<Vec<_>>());
410            let mut result = DataFrame::new();
411            for group in groups {
412                let mut computed = stat.compute_group(&group, scale_set);
413                let n = computed.nrows();
414                if n > 0 {
415                    for fv in facet_vars {
416                        if !computed.has_column(fv) {
417                            if let Some(val) = group.column(fv).and_then(|c| c.first()).cloned() {
418                                computed.add_column(fv.clone(), vec![val; n]);
419                            }
420                        }
421                    }
422                }
423                result.vstack(&computed);
424            }
425            result
426        } else {
427            stat.compute_group(&working_data, scale_set)
428        };
429
430        // Step 6a: Apply after_stat() mappings (rename stat-computed columns)
431        apply_after_stat(&mut working_data, &merged_mapping);
432
433        // Step 6a-validate: A required aesthetic must have been supplied by the
434        // user (pre-stat) or synthesized by the stat (post-stat). This lets
435        // StatEcdf produce `y` for geom_step, while boxplot — which maps `y` then
436        // consumes it into ymin/ymax — still validates. Empty input has the
437        // column in neither place, so genuinely-missing aesthetics still error.
438        for aes in &geom.required_aes() {
439            let col_name = aes.col_name();
440            let supplied = pre_stat_columns.iter().any(|c| c == col_name);
441            if !supplied && !working_data.has_column(col_name) {
442                return Err(GGError::ValidationError(format!(
443                    "geom_{} requires aesthetic '{}' but it was not provided",
444                    geom.name(),
445                    col_name
446                )));
447            }
448        }
449
450        // Step 6b: Ensure scales for stat-computed aesthetics (e.g. y from StatCount/StatBin)
451        let stat_aes = [
452            ("x", Aesthetic::X),
453            ("y", Aesthetic::Y),
454            ("xmin", Aesthetic::X),
455            ("xmax", Aesthetic::X),
456            ("ymin", Aesthetic::Y),
457            ("ymax", Aesthetic::Y),
458        ];
459        for (col, aes) in &stat_aes {
460            if working_data.has_column(col) {
461                scale_set.ensure_scale(aes, &working_data);
462            }
463        }
464
465        // Step 6c: For stat-computed Y (bars, histograms), ensure Y scale includes zero baseline
466        let y_is_user_mapped = merged_mapping.get_mapping(&Aesthetic::Y).is_some();
467        if !y_is_user_mapped && working_data.has_column("y") {
468            if let Some(y_scale) = scale_set.get_mut(&Aesthetic::Y) {
469                y_scale.train(&[crate::data::Value::Float(0.0)]);
470            }
471        }
472
473        // Step 7: Position adjustment
474        let params = PositionParams::default();
475        position.compute(&mut working_data, &params);
476
477        // Step 8: Train scales on this layer's data
478        scale_set.train_layer(&working_data);
479
480        // Step 8b: Positional scales also need to see stat-computed extent columns
481        // (e.g. boxplot/errorbar/pointrange emit ymin/ymax but no "y"). Without
482        // this the Y (or X) scale would never train on the range and collapse.
483        for (col, aes) in &stat_aes {
484            if let Some(values) = working_data.column(col) {
485                if let Some(scale) = scale_set.get_mut(aes) {
486                    scale.train(values);
487                }
488            }
489        }
490
491        Ok(BuiltLayer {
492            data: working_data,
493            geom,
494            show_legend,
495        })
496    }
497
498    /// Remove rows where x or y falls outside scale limits set via xlim/ylim.
499    fn filter_oob_data(data: &mut DataFrame, scale_set: &ScaleSet) {
500        let x_limits = scale_set.get(&Aesthetic::X).and_then(|s| s.filter_limits());
501        let y_limits = scale_set.get(&Aesthetic::Y).and_then(|s| s.filter_limits());
502
503        if x_limits.is_none() && y_limits.is_none() {
504            return;
505        }
506
507        let nrows = data.nrows();
508        let mut keep = vec![true; nrows];
509
510        if let Some((min, max)) = x_limits {
511            if let Some(col) = data.column("x") {
512                for (i, v) in col.iter().enumerate() {
513                    if let Some(f) = v.as_f64() {
514                        if f < min || f > max {
515                            keep[i] = false;
516                        }
517                    }
518                }
519            }
520        }
521
522        if let Some((min, max)) = y_limits {
523            if let Some(col) = data.column("y") {
524                for (i, v) in col.iter().enumerate() {
525                    if let Some(f) = v.as_f64() {
526                        if f < min || f > max {
527                            keep[i] = false;
528                        }
529                    }
530                }
531            }
532        }
533
534        // If nothing was filtered, skip the rebuild
535        if keep.iter().all(|&k| k) {
536            return;
537        }
538
539        let indices: Vec<usize> = keep
540            .iter()
541            .enumerate()
542            .filter(|(_, &k)| k)
543            .map(|(i, _)| i)
544            .collect();
545
546        let mut result = DataFrame::new();
547        for col_name in data.column_names() {
548            if let Some(src) = data.column(col_name) {
549                let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
550                result.add_column(col_name.to_string(), vals);
551            }
552        }
553        *data = result;
554    }
555
556    /// Compute per-panel scale sets for free facet scales.
557    /// For each panel, clones the base scale set, resets freed axes, and retrains on panel data.
558    fn compute_panel_scales(
559        facet_scales: &FacetScales,
560        panels_data: &[Vec<DataFrame>],
561        base_scales: &ScaleSet,
562    ) -> Vec<ScaleSet> {
563        if matches!(facet_scales, FacetScales::Fixed) || panels_data.is_empty() {
564            return vec![];
565        }
566
567        let free_x = matches!(facet_scales, FacetScales::FreeX | FacetScales::Free);
568        let free_y = matches!(facet_scales, FacetScales::FreeY | FacetScales::Free);
569
570        panels_data
571            .iter()
572            .map(|panel_layers| {
573                let mut panel_set = base_scales.clone();
574
575                // Reset freed axis scales
576                if free_x {
577                    if let Some(s) = panel_set.get_mut(&Aesthetic::X) {
578                        s.reset_training();
579                    }
580                }
581                if free_y {
582                    if let Some(s) = panel_set.get_mut(&Aesthetic::Y) {
583                        s.reset_training();
584                    }
585                }
586
587                // Retrain on this panel's data
588                for layer_data in panel_layers {
589                    panel_set.train_layer(layer_data);
590                }
591
592                panel_set
593            })
594            .collect()
595    }
596
597    /// Compute which aesthetics should be suppressed from the legend.
598    /// An aesthetic is suppressed if every layer that has the corresponding column
599    /// sets show_legend=Some(false), and no layer has it as None or Some(true).
600    fn compute_suppressed_aes(built_layers: &[BuiltLayer]) -> std::collections::HashSet<Aesthetic> {
601        use std::collections::HashSet;
602        let legend_aes = [
603            Aesthetic::Color,
604            Aesthetic::Fill,
605            Aesthetic::Shape,
606            Aesthetic::Linetype,
607            Aesthetic::Size,
608            Aesthetic::Alpha,
609        ];
610        let mut suppressed = HashSet::new();
611        for aes in &legend_aes {
612            let col_name = aes.col_name();
613            let mut any_has = false;
614            let mut all_hidden = true;
615            for bl in built_layers {
616                if bl.data.has_column(col_name) {
617                    any_has = true;
618                    match bl.show_legend {
619                        Some(false) => {} // still hidden
620                        _ => {
621                            all_hidden = false;
622                            break;
623                        }
624                    }
625                }
626            }
627            if any_has && all_hidden {
628                suppressed.insert(aes.clone());
629            }
630        }
631        suppressed
632    }
633
634    /// Detect which columns to group by for statistics.
635    /// Checks group/color/fill plus discrete x (like R's auto-grouping by discrete x).
636    fn detect_group_columns(data: &DataFrame) -> Vec<String> {
637        let candidates = ["group", "color", "fill", "x"];
638        let mut group_cols = Vec::new();
639        for &col in &candidates {
640            if data.has_column(col) {
641                if let Some(values) = data.column(col) {
642                    let is_discrete = values
643                        .iter()
644                        .any(|v| matches!(v, crate::data::Value::Str(_)));
645                    if is_discrete {
646                        group_cols.push(col.to_string());
647                    }
648                }
649            }
650        }
651        group_cols
652    }
653}