Skip to main content

ggplot_rs/
build.rs

1use crate::aes::{mapping::apply_after_stat, mapping::resolve_mappings, Aes, Aesthetic};
2use crate::annotate::Annotation;
3use crate::coord::Coord;
4use crate::data::DataFrame;
5use crate::facet::{Facet, FacetScales, Panel};
6use crate::geom::Geom;
7use crate::plot::{GGError, GGPlot, Labels, Layer};
8use crate::position::PositionParams;
9use crate::scale::ScaleSet;
10use crate::theme::Theme;
11
12/// A built layer ready for rendering.
13pub struct BuiltLayer {
14    pub data: DataFrame,
15    pub geom: Box<dyn Geom>,
16    pub show_legend: Option<bool>,
17}
18
19/// A fully built plot ready for rendering.
20pub struct BuiltPlot {
21    pub layers: Vec<BuiltLayer>,
22    pub scales: ScaleSet,
23    pub coord: Box<dyn Coord>,
24    pub theme: Theme,
25    pub labels: Labels,
26    pub facet: Facet,
27    pub panels: Vec<Panel>,
28    /// Per-panel layer data. panels_data[panel_idx][layer_idx] = data for that panel+layer.
29    pub panels_data: Vec<Vec<DataFrame>>,
30    pub annotations: Vec<Annotation>,
31    pub guide_legend: crate::guide::config::GuideLegend,
32    /// Aesthetics suppressed from the legend (all layers with that aes set show_legend=false).
33    pub suppressed_aes: std::collections::HashSet<Aesthetic>,
34    /// Per-panel scale sets for free facets. Empty when FacetScales::Fixed.
35    pub panel_scales: Vec<ScaleSet>,
36}
37
38/// The grammar pipeline: transforms a GGPlot specification into render-ready data.
39pub struct PlotBuilder;
40
41impl PlotBuilder {
42    pub fn build(plot: GGPlot) -> Result<BuiltPlot, GGError> {
43        let GGPlot {
44            data: plot_data,
45            mapping: plot_mapping,
46            layers,
47            scales: user_scales,
48            mut coord,
49            theme,
50            labels,
51            facet,
52            annotations,
53            guide_legend,
54        } = plot;
55
56        let mut scale_set = ScaleSet::new();
57
58        // Add user-specified scales
59        for s in user_scales {
60            scale_set.add(s);
61        }
62
63        let mut built_layers = Vec::new();
64
65        // Faceting variables — used to group stat computation per panel so a
66        // computed stat (density/histogram) is estimated per panel, not pooled.
67        let facet_vars = Self::facet_vars(&facet);
68
69        for layer in layers {
70            let built = Self::build_layer(
71                layer,
72                &plot_data,
73                &plot_mapping,
74                &mut scale_set,
75                theme.primary,
76                &facet_vars,
77            )?;
78            built_layers.push(built);
79        }
80
81        // Final scale training pass across all layers
82        for bl in &built_layers {
83            scale_set.train_layer(&bl.data);
84        }
85
86        // Apply coord zoom limits (coord_cartesian xlim/ylim)
87        if let Some((min, max)) = coord.zoom_x() {
88            scale_set.set_limits(&Aesthetic::X, min, max);
89        }
90        if let Some((min, max)) = coord.zoom_y() {
91            scale_set.set_limits(&Aesthetic::Y, min, max);
92        }
93
94        // Supply trained axis spans to the coordinate system (used by coord_trans).
95        // pmin/pmax are the panel positions of the domain endpoints, so the coord
96        // can invert the scale's (linearly expanded) mapping exactly.
97        let axis_span = |aes: &Aesthetic| {
98            scale_set.get(aes).and_then(|s| {
99                s.domain().map(|(min, max)| crate::coord::AxisSpan {
100                    min,
101                    max,
102                    pmin: s.map(&crate::data::Value::Float(min)),
103                    pmax: s.map(&crate::data::Value::Float(max)),
104                })
105            })
106        };
107        let x_span = axis_span(&Aesthetic::X);
108        let y_span = axis_span(&Aesthetic::Y);
109        coord.set_domains(x_span, y_span);
110
111        // Apply after_scale() color derivations: copy the source aesthetic's
112        // column to the target and register a lightness-modified clone of the
113        // source scale, so the target aesthetic draws the mapped source color
114        // adjusted in lightness (e.g. a darker border derived from the fill).
115        for spec in &plot_mapping.after_scale {
116            if let Some(src_scale) = scale_set.get(&spec.source) {
117                let modified = crate::scale::modified::ScaleColorModified::new(
118                    src_scale.clone_box(),
119                    spec.target.clone(),
120                    spec.lightness,
121                );
122                let (src_col, tgt_col) = (spec.source.col_name(), spec.target.col_name());
123                for bl in &mut built_layers {
124                    if !bl.data.has_column(tgt_col) {
125                        if let Some(vals) = bl.data.column(src_col) {
126                            let vals = vals.to_vec();
127                            bl.data.add_column(tgt_col.to_string(), vals);
128                        }
129                    }
130                }
131                scale_set.add(Box::new(modified));
132            }
133        }
134
135        // Compute facet panels
136        let (panels, panels_data) = Self::compute_facets(&facet, &built_layers, &plot_data);
137
138        // Compute suppressed aesthetics from show_legend flags.
139        let suppressed_aes = Self::compute_suppressed_aes(&built_layers);
140
141        // Compute per-panel scales for free facets
142        let facet_scales_mode = match &facet {
143            Facet::Wrap { scales, .. } => scales.clone(),
144            Facet::Grid { scales, .. } => scales.clone(),
145            Facet::None => FacetScales::Fixed,
146        };
147        let panel_scales = Self::compute_panel_scales(&facet_scales_mode, &panels_data, &scale_set);
148
149        Ok(BuiltPlot {
150            layers: built_layers,
151            scales: scale_set,
152            coord,
153            theme,
154            labels,
155            facet,
156            panels,
157            panels_data,
158            annotations,
159            guide_legend,
160            suppressed_aes,
161            panel_scales,
162        })
163    }
164
165    /// The column name(s) a facet splits on, if any.
166    fn facet_vars(facet: &Facet) -> Vec<String> {
167        match facet {
168            Facet::None => vec![],
169            Facet::Wrap { var, .. } => vec![var.clone()],
170            Facet::Grid {
171                row_var, col_var, ..
172            } => row_var.iter().chain(col_var.iter()).cloned().collect(),
173        }
174    }
175
176    fn compute_facets(
177        facet: &Facet,
178        built_layers: &[BuiltLayer],
179        _plot_data: &DataFrame,
180    ) -> (Vec<Panel>, Vec<Vec<DataFrame>>) {
181        match facet {
182            Facet::None => (vec![], vec![]),
183            Facet::Wrap {
184                var,
185                ncol,
186                labeller,
187                ..
188            } => {
189                // Collect unique levels from all layers' data
190                let mut levels: Vec<String> = Vec::new();
191                for bl in built_layers {
192                    if let Some(col) = bl.data.column(var) {
193                        for v in col {
194                            let key = v.to_group_key();
195                            if !levels.contains(&key) {
196                                levels.push(key);
197                            }
198                        }
199                    }
200                }
201
202                // Panels will be positioned during rendering (depends on layout)
203                let panels: Vec<Panel> = levels
204                    .iter()
205                    .enumerate()
206                    .map(|(i, value)| {
207                        let ncols =
208                            ncol.unwrap_or_else(|| (levels.len() as f64).sqrt().ceil() as usize);
209                        let formatted = labeller.format(var, value);
210                        Panel {
211                            row: i / ncols.max(1),
212                            col: i % ncols.max(1),
213                            label: formatted.clone(),
214                            row_label: None,
215                            col_label: Some(formatted),
216                            rect: crate::render::Rect {
217                                x: 0.0,
218                                y: 0.0,
219                                width: 0.0,
220                                height: 0.0,
221                            },
222                        }
223                    })
224                    .collect();
225
226                // Split data per panel per layer
227                let panels_data: Vec<Vec<DataFrame>> = levels
228                    .iter()
229                    .map(|level| {
230                        built_layers
231                            .iter()
232                            .map(|bl| Self::filter_data_by_var(&bl.data, var, level))
233                            .collect()
234                    })
235                    .collect();
236
237                (panels, panels_data)
238            }
239            Facet::Grid {
240                row_var,
241                col_var,
242                labeller,
243                ..
244            } => {
245                let mut row_levels: Vec<String> = Vec::new();
246                let mut col_levels: Vec<String> = Vec::new();
247
248                for bl in built_layers {
249                    if let Some(rv) = row_var {
250                        if let Some(col) = bl.data.column(rv) {
251                            for v in col {
252                                let key = v.to_group_key();
253                                if !row_levels.contains(&key) {
254                                    row_levels.push(key);
255                                }
256                            }
257                        }
258                    }
259                    if let Some(cv) = col_var {
260                        if let Some(col) = bl.data.column(cv) {
261                            for v in col {
262                                let key = v.to_group_key();
263                                if !col_levels.contains(&key) {
264                                    col_levels.push(key);
265                                }
266                            }
267                        }
268                    }
269                }
270
271                if row_levels.is_empty() {
272                    row_levels.push("".to_string());
273                }
274                if col_levels.is_empty() {
275                    col_levels.push("".to_string());
276                }
277
278                let mut panels = Vec::new();
279                let mut panels_data = Vec::new();
280
281                for (ri, rl) in row_levels.iter().enumerate() {
282                    for (ci, cl) in col_levels.iter().enumerate() {
283                        let row_fmt = if rl.is_empty() {
284                            None
285                        } else {
286                            let rv = row_var.as_deref().unwrap_or("");
287                            Some(labeller.format(rv, rl))
288                        };
289                        let col_fmt = if cl.is_empty() {
290                            None
291                        } else {
292                            let cv = col_var.as_deref().unwrap_or("");
293                            Some(labeller.format(cv, cl))
294                        };
295                        let label = match (&row_fmt, &col_fmt) {
296                            (Some(r), Some(c)) => format!("{r} | {c}"),
297                            (Some(r), None) => r.clone(),
298                            (None, Some(c)) => c.clone(),
299                            (None, None) => String::new(),
300                        };
301                        panels.push(Panel {
302                            row: ri,
303                            col: ci,
304                            label,
305                            row_label: row_fmt,
306                            col_label: col_fmt,
307                            rect: crate::render::Rect {
308                                x: 0.0,
309                                y: 0.0,
310                                width: 0.0,
311                                height: 0.0,
312                            },
313                        });
314
315                        let layer_data: Vec<DataFrame> = built_layers
316                            .iter()
317                            .map(|bl| {
318                                let mut data = bl.data.clone();
319                                if let Some(rv) = row_var {
320                                    if !rl.is_empty() {
321                                        data = Self::filter_data_by_var(&data, rv, rl);
322                                    }
323                                }
324                                if let Some(cv) = col_var {
325                                    if !cl.is_empty() {
326                                        data = Self::filter_data_by_var(&data, cv, cl);
327                                    }
328                                }
329                                data
330                            })
331                            .collect();
332                        panels_data.push(layer_data);
333                    }
334                }
335
336                (panels, panels_data)
337            }
338        }
339    }
340
341    fn filter_data_by_var(data: &DataFrame, var: &str, level: &str) -> DataFrame {
342        if let Some(col) = data.column(var) {
343            let indices: Vec<usize> = col
344                .iter()
345                .enumerate()
346                .filter(|(_, v)| v.to_group_key() == level)
347                .map(|(i, _)| i)
348                .collect();
349
350            let mut result = DataFrame::new();
351            for col_name in data.column_names() {
352                if let Some(src) = data.column(col_name) {
353                    let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
354                    result.add_column(col_name.to_string(), vals);
355                }
356            }
357            result
358        } else {
359            data.clone()
360        }
361    }
362
363    fn build_layer(
364        layer: Layer,
365        plot_data: &DataFrame,
366        plot_mapping: &Aes,
367        scale_set: &mut ScaleSet,
368        primary: Option<(u8, u8, u8)>,
369        facet_vars: &[String],
370    ) -> Result<BuiltLayer, GGError> {
371        let Layer {
372            data: layer_data,
373            mapping: layer_mapping,
374            mut geom,
375            stat,
376            position,
377            params: _,
378            show_legend,
379        } = layer;
380
381        // Step 1: Resolve data — use layer data if provided, else plot data
382        let source_data = layer_data.unwrap_or_else(|| plot_data.clone());
383
384        // Step 2: Merge mappings — layer overrides plot-level
385        let merged_mapping = plot_mapping.merge(&layer_mapping);
386
387        // Brand/primary color: apply to a single-series geom only when the layer
388        // maps neither color nor fill (an explicit aesthetic always wins).
389        if let Some(color) = primary {
390            let has_color = merged_mapping.get_mapping(&Aesthetic::Color).is_some();
391            let has_fill = merged_mapping.get_mapping(&Aesthetic::Fill).is_some();
392            if !has_color && !has_fill {
393                geom.set_series_color(color);
394            }
395        }
396
397        // Step 3: Evaluate aes — rename columns to canonical names
398        let mut working_data = resolve_mappings(&source_data, &merged_mapping);
399
400        // Remember which columns the user actually supplied (pre-stat). A required
401        // aesthetic is satisfied if it was present here OR is synthesized by the
402        // stat (checked after Step 6) — e.g. boxplot maps `y` then the stat turns
403        // it into ymin/ymax, while StatEcdf produces `y` that wasn't mapped.
404        let pre_stat_columns: Vec<String> = working_data
405            .column_names()
406            .iter()
407            .map(|s| s.to_string())
408            .collect();
409
410        // Step 4: Ensure scales exist for each mapped aesthetic
411        for m in &merged_mapping.mappings {
412            scale_set.ensure_scale(&m.aesthetic, &working_data);
413        }
414
415        // Step 5: Scale transformation (e.g., log10 before stats)
416        for scale in scale_set.iter() {
417            let col_name = scale.aesthetic().col_name().to_string();
418            if let Some(col) = working_data.column(&col_name) {
419                let transformed: Vec<_> = col.iter().map(|v| scale.transform(v)).collect();
420                let any_changed = transformed.iter().zip(col.iter()).any(|(t, o)| {
421                    match (t.as_f64(), o.as_f64()) {
422                        (Some(a), Some(b)) => (a - b).abs() > f64::EPSILON,
423                        _ => false,
424                    }
425                });
426                if any_changed {
427                    if let Some(col_mut) = working_data.column_mut(&col_name) {
428                        *col_mut = transformed;
429                    }
430                }
431            }
432        }
433
434        // Step 5b: Filter out-of-bounds data (xlim/ylim filter before stats)
435        Self::filter_oob_data(&mut working_data, scale_set);
436
437        // Step 6: Compute statistics. Group by aesthetic groups AND the facet
438        // variables, so a computed stat (density/histogram/…) is estimated per
439        // panel rather than on pooled data; the facet column is then re-attached
440        // to each group's output so faceting can split it back out.
441        let mut group_cols = Self::detect_group_columns(&working_data);
442        for fv in facet_vars {
443            if working_data.has_column(fv) && !group_cols.contains(fv) {
444                group_cols.push(fv.clone());
445            }
446        }
447
448        working_data = if !group_cols.is_empty() {
449            let groups =
450                working_data.group_by(&group_cols.iter().map(|s| s.as_str()).collect::<Vec<_>>());
451            let mut result = DataFrame::new();
452            for group in groups {
453                let mut computed = stat.compute_group(&group, scale_set);
454                let n = computed.nrows();
455                if n > 0 {
456                    for fv in facet_vars {
457                        if !computed.has_column(fv) {
458                            if let Some(val) = group.column(fv).and_then(|c| c.first()).cloned() {
459                                computed.add_column(fv.clone(), vec![val; n]);
460                            }
461                        }
462                    }
463                }
464                result.vstack(&computed);
465            }
466            result
467        } else {
468            stat.compute_group(&working_data, scale_set)
469        };
470
471        // Step 6a: Apply after_stat() mappings (rename stat-computed columns)
472        apply_after_stat(&mut working_data, &merged_mapping);
473
474        // Step 6a-validate: A required aesthetic must have been supplied by the
475        // user (pre-stat) or synthesized by the stat (post-stat). This lets
476        // StatEcdf produce `y` for geom_step, while boxplot — which maps `y` then
477        // consumes it into ymin/ymax — still validates. Empty input has the
478        // column in neither place, so genuinely-missing aesthetics still error.
479        for aes in &geom.required_aes() {
480            let col_name = aes.col_name();
481            let supplied = pre_stat_columns.iter().any(|c| c == col_name);
482            if !supplied && !working_data.has_column(col_name) {
483                return Err(GGError::ValidationError(format!(
484                    "geom_{} requires aesthetic '{}' but it was not provided",
485                    geom.name(),
486                    col_name
487                )));
488            }
489        }
490
491        // Step 6b: Ensure scales for stat-computed aesthetics (e.g. y from StatCount/StatBin)
492        let stat_aes = [
493            ("x", Aesthetic::X),
494            ("y", Aesthetic::Y),
495            ("xmin", Aesthetic::X),
496            ("xmax", Aesthetic::X),
497            ("ymin", Aesthetic::Y),
498            ("ymax", Aesthetic::Y),
499        ];
500        for (col, aes) in &stat_aes {
501            if working_data.has_column(col) {
502                scale_set.ensure_scale(aes, &working_data);
503            }
504        }
505
506        // Step 6c: For stat-computed Y (bars, histograms), ensure Y scale includes zero baseline
507        let y_is_user_mapped = merged_mapping.get_mapping(&Aesthetic::Y).is_some();
508        if !y_is_user_mapped && working_data.has_column("y") {
509            if let Some(y_scale) = scale_set.get_mut(&Aesthetic::Y) {
510                y_scale.train(&[crate::data::Value::Float(0.0)]);
511            }
512        }
513
514        // Step 7: Position adjustment
515        let params = PositionParams::default();
516        position.compute(&mut working_data, &params);
517
518        // Step 8: Train scales on this layer's data
519        scale_set.train_layer(&working_data);
520
521        // Step 8b: Positional scales also need to see stat-computed extent columns
522        // (e.g. boxplot/errorbar/pointrange emit ymin/ymax but no "y"). Without
523        // this the Y (or X) scale would never train on the range and collapse.
524        for (col, aes) in &stat_aes {
525            if let Some(values) = working_data.column(col) {
526                if let Some(scale) = scale_set.get_mut(aes) {
527                    scale.train(values);
528                }
529            }
530        }
531
532        Ok(BuiltLayer {
533            data: working_data,
534            geom,
535            show_legend,
536        })
537    }
538
539    /// Remove rows where x or y falls outside scale limits set via xlim/ylim.
540    fn filter_oob_data(data: &mut DataFrame, scale_set: &ScaleSet) {
541        let x_limits = scale_set.get(&Aesthetic::X).and_then(|s| s.filter_limits());
542        let y_limits = scale_set.get(&Aesthetic::Y).and_then(|s| s.filter_limits());
543
544        if x_limits.is_none() && y_limits.is_none() {
545            return;
546        }
547
548        let nrows = data.nrows();
549        let mut keep = vec![true; nrows];
550
551        if let Some((min, max)) = x_limits {
552            if let Some(col) = data.column("x") {
553                for (i, v) in col.iter().enumerate() {
554                    if let Some(f) = v.as_f64() {
555                        if f < min || f > max {
556                            keep[i] = false;
557                        }
558                    }
559                }
560            }
561        }
562
563        if let Some((min, max)) = y_limits {
564            if let Some(col) = data.column("y") {
565                for (i, v) in col.iter().enumerate() {
566                    if let Some(f) = v.as_f64() {
567                        if f < min || f > max {
568                            keep[i] = false;
569                        }
570                    }
571                }
572            }
573        }
574
575        // If nothing was filtered, skip the rebuild
576        if keep.iter().all(|&k| k) {
577            return;
578        }
579
580        let indices: Vec<usize> = keep
581            .iter()
582            .enumerate()
583            .filter(|(_, &k)| k)
584            .map(|(i, _)| i)
585            .collect();
586
587        let mut result = DataFrame::new();
588        for col_name in data.column_names() {
589            if let Some(src) = data.column(col_name) {
590                let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
591                result.add_column(col_name.to_string(), vals);
592            }
593        }
594        *data = result;
595    }
596
597    /// Compute per-panel scale sets for free facet scales.
598    /// For each panel, clones the base scale set, resets freed axes, and retrains on panel data.
599    fn compute_panel_scales(
600        facet_scales: &FacetScales,
601        panels_data: &[Vec<DataFrame>],
602        base_scales: &ScaleSet,
603    ) -> Vec<ScaleSet> {
604        if matches!(facet_scales, FacetScales::Fixed) || panels_data.is_empty() {
605            return vec![];
606        }
607
608        let free_x = matches!(facet_scales, FacetScales::FreeX | FacetScales::Free);
609        let free_y = matches!(facet_scales, FacetScales::FreeY | FacetScales::Free);
610
611        panels_data
612            .iter()
613            .map(|panel_layers| {
614                let mut panel_set = base_scales.clone();
615
616                // Reset freed axis scales
617                if free_x {
618                    if let Some(s) = panel_set.get_mut(&Aesthetic::X) {
619                        s.reset_training();
620                    }
621                }
622                if free_y {
623                    if let Some(s) = panel_set.get_mut(&Aesthetic::Y) {
624                        s.reset_training();
625                    }
626                }
627
628                // Retrain on this panel's data
629                for layer_data in panel_layers {
630                    panel_set.train_layer(layer_data);
631                }
632
633                panel_set
634            })
635            .collect()
636    }
637
638    /// Compute which aesthetics should be suppressed from the legend.
639    /// An aesthetic is suppressed if every layer that has the corresponding column
640    /// sets show_legend=Some(false), and no layer has it as None or Some(true).
641    fn compute_suppressed_aes(built_layers: &[BuiltLayer]) -> std::collections::HashSet<Aesthetic> {
642        use std::collections::HashSet;
643        let legend_aes = [
644            Aesthetic::Color,
645            Aesthetic::Fill,
646            Aesthetic::Shape,
647            Aesthetic::Linetype,
648            Aesthetic::Size,
649            Aesthetic::Alpha,
650        ];
651        let mut suppressed = HashSet::new();
652        for aes in &legend_aes {
653            let col_name = aes.col_name();
654            let mut any_has = false;
655            let mut all_hidden = true;
656            for bl in built_layers {
657                if bl.data.has_column(col_name) {
658                    any_has = true;
659                    match bl.show_legend {
660                        Some(false) => {} // still hidden
661                        _ => {
662                            all_hidden = false;
663                            break;
664                        }
665                    }
666                }
667            }
668            if any_has && all_hidden {
669                suppressed.insert(aes.clone());
670            }
671        }
672        suppressed
673    }
674
675    /// Detect which columns to group by for statistics.
676    /// Checks group/color/fill plus discrete x (like R's auto-grouping by discrete x).
677    fn detect_group_columns(data: &DataFrame) -> Vec<String> {
678        let candidates = ["group", "color", "fill", "x"];
679        let mut group_cols = Vec::new();
680        for &col in &candidates {
681            if data.has_column(col) {
682                if let Some(values) = data.column(col) {
683                    let is_discrete = values
684                        .iter()
685                        .any(|v| matches!(v, crate::data::Value::Str(_)));
686                    if is_discrete {
687                        group_cols.push(col.to_string());
688                    }
689                }
690            }
691        }
692        group_cols
693    }
694}