Skip to main content

ggplot_rs/
build.rs

1use crate::aes::{mapping::apply_after_stat, mapping::resolve_mappings, Aes, Aesthetic};
2use crate::annotate::Annotation;
3use crate::coord::Coord;
4use crate::data::DataFrame;
5use crate::facet::{Facet, FacetScales, Panel};
6use crate::geom::Geom;
7use crate::plot::{GGError, GGPlot, Labels, Layer};
8use crate::position::PositionParams;
9use crate::scale::ScaleSet;
10use crate::theme::Theme;
11
12/// A built layer ready for rendering.
13pub struct BuiltLayer {
14    pub data: DataFrame,
15    pub geom: Box<dyn Geom>,
16    pub show_legend: Option<bool>,
17}
18
19/// A fully built plot ready for rendering.
20pub struct BuiltPlot {
21    pub layers: Vec<BuiltLayer>,
22    pub scales: ScaleSet,
23    pub coord: Box<dyn Coord>,
24    pub theme: Theme,
25    pub labels: Labels,
26    pub facet: Facet,
27    pub panels: Vec<Panel>,
28    /// Per-panel layer data. panels_data[panel_idx][layer_idx] = data for that panel+layer.
29    pub panels_data: Vec<Vec<DataFrame>>,
30    pub annotations: Vec<Annotation>,
31    pub guide_legend: crate::guide::config::GuideLegend,
32    /// Aesthetics suppressed from the legend (all layers with that aes set show_legend=false).
33    pub suppressed_aes: std::collections::HashSet<Aesthetic>,
34    /// Per-panel scale sets for free facets. Empty when FacetScales::Fixed.
35    pub panel_scales: Vec<ScaleSet>,
36}
37
38/// The grammar pipeline: transforms a GGPlot specification into render-ready data.
39pub struct PlotBuilder;
40
41impl PlotBuilder {
42    pub fn build(plot: GGPlot) -> Result<BuiltPlot, GGError> {
43        let GGPlot {
44            data: plot_data,
45            mapping: plot_mapping,
46            layers,
47            scales: user_scales,
48            mut coord,
49            theme,
50            labels,
51            facet,
52            annotations,
53            guide_legend,
54        } = plot;
55
56        let mut scale_set = ScaleSet::new();
57
58        // Add user-specified scales
59        for s in user_scales {
60            scale_set.add(s);
61        }
62
63        let mut built_layers = Vec::new();
64
65        // Faceting variables — used to group stat computation per panel so a
66        // computed stat (density/histogram) is estimated per panel, not pooled.
67        let facet_vars = Self::facet_vars(&facet);
68
69        for layer in layers {
70            let built = Self::build_layer(
71                layer,
72                &plot_data,
73                &plot_mapping,
74                &mut scale_set,
75                theme.primary,
76                &facet_vars,
77            )?;
78            built_layers.push(built);
79        }
80
81        // Final scale training pass across all layers
82        for bl in &built_layers {
83            scale_set.train_layer(&bl.data);
84        }
85
86        // Apply coord zoom limits (coord_cartesian xlim/ylim)
87        if let Some((min, max)) = coord.zoom_x() {
88            scale_set.set_limits(&Aesthetic::X, min, max);
89        }
90        if let Some((min, max)) = coord.zoom_y() {
91            scale_set.set_limits(&Aesthetic::Y, min, max);
92        }
93
94        // Supply trained axis spans to the coordinate system (used by coord_trans).
95        // pmin/pmax are the panel positions of the domain endpoints, so the coord
96        // can invert the scale's (linearly expanded) mapping exactly.
97        let axis_span = |aes: &Aesthetic| {
98            scale_set.get(aes).and_then(|s| {
99                s.domain().map(|(min, max)| crate::coord::AxisSpan {
100                    min,
101                    max,
102                    pmin: s.map(&crate::data::Value::Float(min)),
103                    pmax: s.map(&crate::data::Value::Float(max)),
104                })
105            })
106        };
107        let x_span = axis_span(&Aesthetic::X);
108        let y_span = axis_span(&Aesthetic::Y);
109        coord.set_domains(x_span, y_span);
110
111        // Compute facet panels
112        let (panels, panels_data) = Self::compute_facets(&facet, &built_layers, &plot_data);
113
114        // Compute suppressed aesthetics from show_legend flags.
115        let suppressed_aes = Self::compute_suppressed_aes(&built_layers);
116
117        // Compute per-panel scales for free facets
118        let facet_scales_mode = match &facet {
119            Facet::Wrap { scales, .. } => scales.clone(),
120            Facet::Grid { scales, .. } => scales.clone(),
121            Facet::None => FacetScales::Fixed,
122        };
123        let panel_scales = Self::compute_panel_scales(&facet_scales_mode, &panels_data, &scale_set);
124
125        Ok(BuiltPlot {
126            layers: built_layers,
127            scales: scale_set,
128            coord,
129            theme,
130            labels,
131            facet,
132            panels,
133            panels_data,
134            annotations,
135            guide_legend,
136            suppressed_aes,
137            panel_scales,
138        })
139    }
140
141    /// The column name(s) a facet splits on, if any.
142    fn facet_vars(facet: &Facet) -> Vec<String> {
143        match facet {
144            Facet::None => vec![],
145            Facet::Wrap { var, .. } => vec![var.clone()],
146            Facet::Grid {
147                row_var, col_var, ..
148            } => row_var.iter().chain(col_var.iter()).cloned().collect(),
149        }
150    }
151
152    fn compute_facets(
153        facet: &Facet,
154        built_layers: &[BuiltLayer],
155        _plot_data: &DataFrame,
156    ) -> (Vec<Panel>, Vec<Vec<DataFrame>>) {
157        match facet {
158            Facet::None => (vec![], vec![]),
159            Facet::Wrap {
160                var,
161                ncol,
162                labeller,
163                ..
164            } => {
165                // Collect unique levels from all layers' data
166                let mut levels: Vec<String> = Vec::new();
167                for bl in built_layers {
168                    if let Some(col) = bl.data.column(var) {
169                        for v in col {
170                            let key = v.to_group_key();
171                            if !levels.contains(&key) {
172                                levels.push(key);
173                            }
174                        }
175                    }
176                }
177
178                // Panels will be positioned during rendering (depends on layout)
179                let panels: Vec<Panel> = levels
180                    .iter()
181                    .enumerate()
182                    .map(|(i, value)| {
183                        let ncols =
184                            ncol.unwrap_or_else(|| (levels.len() as f64).sqrt().ceil() as usize);
185                        let formatted = labeller.format(var, value);
186                        Panel {
187                            row: i / ncols.max(1),
188                            col: i % ncols.max(1),
189                            label: formatted.clone(),
190                            row_label: None,
191                            col_label: Some(formatted),
192                            rect: crate::render::Rect {
193                                x: 0.0,
194                                y: 0.0,
195                                width: 0.0,
196                                height: 0.0,
197                            },
198                        }
199                    })
200                    .collect();
201
202                // Split data per panel per layer
203                let panels_data: Vec<Vec<DataFrame>> = levels
204                    .iter()
205                    .map(|level| {
206                        built_layers
207                            .iter()
208                            .map(|bl| Self::filter_data_by_var(&bl.data, var, level))
209                            .collect()
210                    })
211                    .collect();
212
213                (panels, panels_data)
214            }
215            Facet::Grid {
216                row_var,
217                col_var,
218                labeller,
219                ..
220            } => {
221                let mut row_levels: Vec<String> = Vec::new();
222                let mut col_levels: Vec<String> = Vec::new();
223
224                for bl in built_layers {
225                    if let Some(rv) = row_var {
226                        if let Some(col) = bl.data.column(rv) {
227                            for v in col {
228                                let key = v.to_group_key();
229                                if !row_levels.contains(&key) {
230                                    row_levels.push(key);
231                                }
232                            }
233                        }
234                    }
235                    if let Some(cv) = col_var {
236                        if let Some(col) = bl.data.column(cv) {
237                            for v in col {
238                                let key = v.to_group_key();
239                                if !col_levels.contains(&key) {
240                                    col_levels.push(key);
241                                }
242                            }
243                        }
244                    }
245                }
246
247                if row_levels.is_empty() {
248                    row_levels.push("".to_string());
249                }
250                if col_levels.is_empty() {
251                    col_levels.push("".to_string());
252                }
253
254                let mut panels = Vec::new();
255                let mut panels_data = Vec::new();
256
257                for (ri, rl) in row_levels.iter().enumerate() {
258                    for (ci, cl) in col_levels.iter().enumerate() {
259                        let row_fmt = if rl.is_empty() {
260                            None
261                        } else {
262                            let rv = row_var.as_deref().unwrap_or("");
263                            Some(labeller.format(rv, rl))
264                        };
265                        let col_fmt = if cl.is_empty() {
266                            None
267                        } else {
268                            let cv = col_var.as_deref().unwrap_or("");
269                            Some(labeller.format(cv, cl))
270                        };
271                        let label = match (&row_fmt, &col_fmt) {
272                            (Some(r), Some(c)) => format!("{r} | {c}"),
273                            (Some(r), None) => r.clone(),
274                            (None, Some(c)) => c.clone(),
275                            (None, None) => String::new(),
276                        };
277                        panels.push(Panel {
278                            row: ri,
279                            col: ci,
280                            label,
281                            row_label: row_fmt,
282                            col_label: col_fmt,
283                            rect: crate::render::Rect {
284                                x: 0.0,
285                                y: 0.0,
286                                width: 0.0,
287                                height: 0.0,
288                            },
289                        });
290
291                        let layer_data: Vec<DataFrame> = built_layers
292                            .iter()
293                            .map(|bl| {
294                                let mut data = bl.data.clone();
295                                if let Some(rv) = row_var {
296                                    if !rl.is_empty() {
297                                        data = Self::filter_data_by_var(&data, rv, rl);
298                                    }
299                                }
300                                if let Some(cv) = col_var {
301                                    if !cl.is_empty() {
302                                        data = Self::filter_data_by_var(&data, cv, cl);
303                                    }
304                                }
305                                data
306                            })
307                            .collect();
308                        panels_data.push(layer_data);
309                    }
310                }
311
312                (panels, panels_data)
313            }
314        }
315    }
316
317    fn filter_data_by_var(data: &DataFrame, var: &str, level: &str) -> DataFrame {
318        if let Some(col) = data.column(var) {
319            let indices: Vec<usize> = col
320                .iter()
321                .enumerate()
322                .filter(|(_, v)| v.to_group_key() == level)
323                .map(|(i, _)| i)
324                .collect();
325
326            let mut result = DataFrame::new();
327            for col_name in data.column_names() {
328                if let Some(src) = data.column(col_name) {
329                    let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
330                    result.add_column(col_name.to_string(), vals);
331                }
332            }
333            result
334        } else {
335            data.clone()
336        }
337    }
338
339    fn build_layer(
340        layer: Layer,
341        plot_data: &DataFrame,
342        plot_mapping: &Aes,
343        scale_set: &mut ScaleSet,
344        primary: Option<(u8, u8, u8)>,
345        facet_vars: &[String],
346    ) -> Result<BuiltLayer, GGError> {
347        let Layer {
348            data: layer_data,
349            mapping: layer_mapping,
350            mut geom,
351            stat,
352            position,
353            params: _,
354            show_legend,
355        } = layer;
356
357        // Step 1: Resolve data — use layer data if provided, else plot data
358        let source_data = layer_data.unwrap_or_else(|| plot_data.clone());
359
360        // Step 2: Merge mappings — layer overrides plot-level
361        let merged_mapping = plot_mapping.merge(&layer_mapping);
362
363        // Brand/primary color: apply to a single-series geom only when the layer
364        // maps neither color nor fill (an explicit aesthetic always wins).
365        if let Some(color) = primary {
366            let has_color = merged_mapping.get_mapping(&Aesthetic::Color).is_some();
367            let has_fill = merged_mapping.get_mapping(&Aesthetic::Fill).is_some();
368            if !has_color && !has_fill {
369                geom.set_series_color(color);
370            }
371        }
372
373        // Step 3: Evaluate aes — rename columns to canonical names
374        let mut working_data = resolve_mappings(&source_data, &merged_mapping);
375
376        // Remember which columns the user actually supplied (pre-stat). A required
377        // aesthetic is satisfied if it was present here OR is synthesized by the
378        // stat (checked after Step 6) — e.g. boxplot maps `y` then the stat turns
379        // it into ymin/ymax, while StatEcdf produces `y` that wasn't mapped.
380        let pre_stat_columns: Vec<String> = working_data
381            .column_names()
382            .iter()
383            .map(|s| s.to_string())
384            .collect();
385
386        // Step 4: Ensure scales exist for each mapped aesthetic
387        for m in &merged_mapping.mappings {
388            scale_set.ensure_scale(&m.aesthetic, &working_data);
389        }
390
391        // Step 5: Scale transformation (e.g., log10 before stats)
392        for scale in scale_set.iter() {
393            let col_name = scale.aesthetic().col_name().to_string();
394            if let Some(col) = working_data.column(&col_name) {
395                let transformed: Vec<_> = col.iter().map(|v| scale.transform(v)).collect();
396                let any_changed = transformed.iter().zip(col.iter()).any(|(t, o)| {
397                    match (t.as_f64(), o.as_f64()) {
398                        (Some(a), Some(b)) => (a - b).abs() > f64::EPSILON,
399                        _ => false,
400                    }
401                });
402                if any_changed {
403                    if let Some(col_mut) = working_data.column_mut(&col_name) {
404                        *col_mut = transformed;
405                    }
406                }
407            }
408        }
409
410        // Step 5b: Filter out-of-bounds data (xlim/ylim filter before stats)
411        Self::filter_oob_data(&mut working_data, scale_set);
412
413        // Step 6: Compute statistics. Group by aesthetic groups AND the facet
414        // variables, so a computed stat (density/histogram/…) is estimated per
415        // panel rather than on pooled data; the facet column is then re-attached
416        // to each group's output so faceting can split it back out.
417        let mut group_cols = Self::detect_group_columns(&working_data);
418        for fv in facet_vars {
419            if working_data.has_column(fv) && !group_cols.contains(fv) {
420                group_cols.push(fv.clone());
421            }
422        }
423
424        working_data = if !group_cols.is_empty() {
425            let groups =
426                working_data.group_by(&group_cols.iter().map(|s| s.as_str()).collect::<Vec<_>>());
427            let mut result = DataFrame::new();
428            for group in groups {
429                let mut computed = stat.compute_group(&group, scale_set);
430                let n = computed.nrows();
431                if n > 0 {
432                    for fv in facet_vars {
433                        if !computed.has_column(fv) {
434                            if let Some(val) = group.column(fv).and_then(|c| c.first()).cloned() {
435                                computed.add_column(fv.clone(), vec![val; n]);
436                            }
437                        }
438                    }
439                }
440                result.vstack(&computed);
441            }
442            result
443        } else {
444            stat.compute_group(&working_data, scale_set)
445        };
446
447        // Step 6a: Apply after_stat() mappings (rename stat-computed columns)
448        apply_after_stat(&mut working_data, &merged_mapping);
449
450        // Step 6a-validate: A required aesthetic must have been supplied by the
451        // user (pre-stat) or synthesized by the stat (post-stat). This lets
452        // StatEcdf produce `y` for geom_step, while boxplot — which maps `y` then
453        // consumes it into ymin/ymax — still validates. Empty input has the
454        // column in neither place, so genuinely-missing aesthetics still error.
455        for aes in &geom.required_aes() {
456            let col_name = aes.col_name();
457            let supplied = pre_stat_columns.iter().any(|c| c == col_name);
458            if !supplied && !working_data.has_column(col_name) {
459                return Err(GGError::ValidationError(format!(
460                    "geom_{} requires aesthetic '{}' but it was not provided",
461                    geom.name(),
462                    col_name
463                )));
464            }
465        }
466
467        // Step 6b: Ensure scales for stat-computed aesthetics (e.g. y from StatCount/StatBin)
468        let stat_aes = [
469            ("x", Aesthetic::X),
470            ("y", Aesthetic::Y),
471            ("xmin", Aesthetic::X),
472            ("xmax", Aesthetic::X),
473            ("ymin", Aesthetic::Y),
474            ("ymax", Aesthetic::Y),
475        ];
476        for (col, aes) in &stat_aes {
477            if working_data.has_column(col) {
478                scale_set.ensure_scale(aes, &working_data);
479            }
480        }
481
482        // Step 6c: For stat-computed Y (bars, histograms), ensure Y scale includes zero baseline
483        let y_is_user_mapped = merged_mapping.get_mapping(&Aesthetic::Y).is_some();
484        if !y_is_user_mapped && working_data.has_column("y") {
485            if let Some(y_scale) = scale_set.get_mut(&Aesthetic::Y) {
486                y_scale.train(&[crate::data::Value::Float(0.0)]);
487            }
488        }
489
490        // Step 7: Position adjustment
491        let params = PositionParams::default();
492        position.compute(&mut working_data, &params);
493
494        // Step 8: Train scales on this layer's data
495        scale_set.train_layer(&working_data);
496
497        // Step 8b: Positional scales also need to see stat-computed extent columns
498        // (e.g. boxplot/errorbar/pointrange emit ymin/ymax but no "y"). Without
499        // this the Y (or X) scale would never train on the range and collapse.
500        for (col, aes) in &stat_aes {
501            if let Some(values) = working_data.column(col) {
502                if let Some(scale) = scale_set.get_mut(aes) {
503                    scale.train(values);
504                }
505            }
506        }
507
508        Ok(BuiltLayer {
509            data: working_data,
510            geom,
511            show_legend,
512        })
513    }
514
515    /// Remove rows where x or y falls outside scale limits set via xlim/ylim.
516    fn filter_oob_data(data: &mut DataFrame, scale_set: &ScaleSet) {
517        let x_limits = scale_set.get(&Aesthetic::X).and_then(|s| s.filter_limits());
518        let y_limits = scale_set.get(&Aesthetic::Y).and_then(|s| s.filter_limits());
519
520        if x_limits.is_none() && y_limits.is_none() {
521            return;
522        }
523
524        let nrows = data.nrows();
525        let mut keep = vec![true; nrows];
526
527        if let Some((min, max)) = x_limits {
528            if let Some(col) = data.column("x") {
529                for (i, v) in col.iter().enumerate() {
530                    if let Some(f) = v.as_f64() {
531                        if f < min || f > max {
532                            keep[i] = false;
533                        }
534                    }
535                }
536            }
537        }
538
539        if let Some((min, max)) = y_limits {
540            if let Some(col) = data.column("y") {
541                for (i, v) in col.iter().enumerate() {
542                    if let Some(f) = v.as_f64() {
543                        if f < min || f > max {
544                            keep[i] = false;
545                        }
546                    }
547                }
548            }
549        }
550
551        // If nothing was filtered, skip the rebuild
552        if keep.iter().all(|&k| k) {
553            return;
554        }
555
556        let indices: Vec<usize> = keep
557            .iter()
558            .enumerate()
559            .filter(|(_, &k)| k)
560            .map(|(i, _)| i)
561            .collect();
562
563        let mut result = DataFrame::new();
564        for col_name in data.column_names() {
565            if let Some(src) = data.column(col_name) {
566                let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
567                result.add_column(col_name.to_string(), vals);
568            }
569        }
570        *data = result;
571    }
572
573    /// Compute per-panel scale sets for free facet scales.
574    /// For each panel, clones the base scale set, resets freed axes, and retrains on panel data.
575    fn compute_panel_scales(
576        facet_scales: &FacetScales,
577        panels_data: &[Vec<DataFrame>],
578        base_scales: &ScaleSet,
579    ) -> Vec<ScaleSet> {
580        if matches!(facet_scales, FacetScales::Fixed) || panels_data.is_empty() {
581            return vec![];
582        }
583
584        let free_x = matches!(facet_scales, FacetScales::FreeX | FacetScales::Free);
585        let free_y = matches!(facet_scales, FacetScales::FreeY | FacetScales::Free);
586
587        panels_data
588            .iter()
589            .map(|panel_layers| {
590                let mut panel_set = base_scales.clone();
591
592                // Reset freed axis scales
593                if free_x {
594                    if let Some(s) = panel_set.get_mut(&Aesthetic::X) {
595                        s.reset_training();
596                    }
597                }
598                if free_y {
599                    if let Some(s) = panel_set.get_mut(&Aesthetic::Y) {
600                        s.reset_training();
601                    }
602                }
603
604                // Retrain on this panel's data
605                for layer_data in panel_layers {
606                    panel_set.train_layer(layer_data);
607                }
608
609                panel_set
610            })
611            .collect()
612    }
613
614    /// Compute which aesthetics should be suppressed from the legend.
615    /// An aesthetic is suppressed if every layer that has the corresponding column
616    /// sets show_legend=Some(false), and no layer has it as None or Some(true).
617    fn compute_suppressed_aes(built_layers: &[BuiltLayer]) -> std::collections::HashSet<Aesthetic> {
618        use std::collections::HashSet;
619        let legend_aes = [
620            Aesthetic::Color,
621            Aesthetic::Fill,
622            Aesthetic::Shape,
623            Aesthetic::Linetype,
624            Aesthetic::Size,
625            Aesthetic::Alpha,
626        ];
627        let mut suppressed = HashSet::new();
628        for aes in &legend_aes {
629            let col_name = aes.col_name();
630            let mut any_has = false;
631            let mut all_hidden = true;
632            for bl in built_layers {
633                if bl.data.has_column(col_name) {
634                    any_has = true;
635                    match bl.show_legend {
636                        Some(false) => {} // still hidden
637                        _ => {
638                            all_hidden = false;
639                            break;
640                        }
641                    }
642                }
643            }
644            if any_has && all_hidden {
645                suppressed.insert(aes.clone());
646            }
647        }
648        suppressed
649    }
650
651    /// Detect which columns to group by for statistics.
652    /// Checks group/color/fill plus discrete x (like R's auto-grouping by discrete x).
653    fn detect_group_columns(data: &DataFrame) -> Vec<String> {
654        let candidates = ["group", "color", "fill", "x"];
655        let mut group_cols = Vec::new();
656        for &col in &candidates {
657            if data.has_column(col) {
658                if let Some(values) = data.column(col) {
659                    let is_discrete = values
660                        .iter()
661                        .any(|v| matches!(v, crate::data::Value::Str(_)));
662                    if is_discrete {
663                        group_cols.push(col.to_string());
664                    }
665                }
666            }
667        }
668        group_cols
669    }
670}