Skip to main content

ggplot_rs/
build.rs

1use crate::aes::{mapping::apply_after_stat, mapping::resolve_mappings, Aes, Aesthetic};
2use crate::annotate::Annotation;
3use crate::coord::Coord;
4use crate::data::DataFrame;
5use crate::facet::{Facet, FacetScales, Panel};
6use crate::geom::Geom;
7use crate::plot::{GGError, GGPlot, Labels, Layer};
8use crate::position::PositionParams;
9use crate::scale::ScaleSet;
10use crate::theme::Theme;
11
12/// A built layer ready for rendering.
13pub struct BuiltLayer {
14    pub data: DataFrame,
15    pub geom: Box<dyn Geom>,
16    pub show_legend: Option<bool>,
17}
18
19/// A fully built plot ready for rendering.
20pub struct BuiltPlot {
21    pub layers: Vec<BuiltLayer>,
22    pub scales: ScaleSet,
23    pub coord: Box<dyn Coord>,
24    pub theme: Theme,
25    pub labels: Labels,
26    pub facet: Facet,
27    pub panels: Vec<Panel>,
28    /// Per-panel layer data. panels_data[panel_idx][layer_idx] = data for that panel+layer.
29    pub panels_data: Vec<Vec<DataFrame>>,
30    pub annotations: Vec<Annotation>,
31    pub guide_legend: crate::guide::config::GuideLegend,
32    /// Aesthetics suppressed from the legend (all layers with that aes set show_legend=false).
33    pub suppressed_aes: std::collections::HashSet<Aesthetic>,
34    /// Per-panel scale sets for free facets. Empty when FacetScales::Fixed.
35    pub panel_scales: Vec<ScaleSet>,
36}
37
38/// The grammar pipeline: transforms a GGPlot specification into render-ready data.
39pub struct PlotBuilder;
40
41impl PlotBuilder {
42    pub fn build(plot: GGPlot) -> Result<BuiltPlot, GGError> {
43        let GGPlot {
44            data: plot_data,
45            mapping: plot_mapping,
46            layers,
47            scales: user_scales,
48            coord,
49            theme,
50            labels,
51            facet,
52            annotations,
53            guide_legend,
54        } = plot;
55
56        let mut scale_set = ScaleSet::new();
57
58        // Add user-specified scales
59        for s in user_scales {
60            scale_set.add(s);
61        }
62
63        let mut built_layers = Vec::new();
64
65        for layer in layers {
66            let built = Self::build_layer(
67                layer,
68                &plot_data,
69                &plot_mapping,
70                &mut scale_set,
71                theme.primary,
72            )?;
73            built_layers.push(built);
74        }
75
76        // Final scale training pass across all layers
77        for bl in &built_layers {
78            scale_set.train_layer(&bl.data);
79        }
80
81        // Apply coord zoom limits (coord_cartesian xlim/ylim)
82        if let Some((min, max)) = coord.zoom_x() {
83            scale_set.set_limits(&Aesthetic::X, min, max);
84        }
85        if let Some((min, max)) = coord.zoom_y() {
86            scale_set.set_limits(&Aesthetic::Y, min, max);
87        }
88
89        // Compute facet panels
90        let (panels, panels_data) = Self::compute_facets(&facet, &built_layers, &plot_data);
91
92        // Compute suppressed aesthetics from show_legend flags.
93        let suppressed_aes = Self::compute_suppressed_aes(&built_layers);
94
95        // Compute per-panel scales for free facets
96        let facet_scales_mode = match &facet {
97            Facet::Wrap { scales, .. } => scales.clone(),
98            Facet::Grid { scales, .. } => scales.clone(),
99            Facet::None => FacetScales::Fixed,
100        };
101        let panel_scales = Self::compute_panel_scales(&facet_scales_mode, &panels_data, &scale_set);
102
103        Ok(BuiltPlot {
104            layers: built_layers,
105            scales: scale_set,
106            coord,
107            theme,
108            labels,
109            facet,
110            panels,
111            panels_data,
112            annotations,
113            guide_legend,
114            suppressed_aes,
115            panel_scales,
116        })
117    }
118
119    fn compute_facets(
120        facet: &Facet,
121        built_layers: &[BuiltLayer],
122        _plot_data: &DataFrame,
123    ) -> (Vec<Panel>, Vec<Vec<DataFrame>>) {
124        match facet {
125            Facet::None => (vec![], vec![]),
126            Facet::Wrap {
127                var,
128                ncol,
129                labeller,
130                ..
131            } => {
132                // Collect unique levels from all layers' data
133                let mut levels: Vec<String> = Vec::new();
134                for bl in built_layers {
135                    if let Some(col) = bl.data.column(var) {
136                        for v in col {
137                            let key = v.to_group_key();
138                            if !levels.contains(&key) {
139                                levels.push(key);
140                            }
141                        }
142                    }
143                }
144
145                // Panels will be positioned during rendering (depends on layout)
146                let panels: Vec<Panel> = levels
147                    .iter()
148                    .enumerate()
149                    .map(|(i, value)| {
150                        let ncols =
151                            ncol.unwrap_or_else(|| (levels.len() as f64).sqrt().ceil() as usize);
152                        let formatted = labeller.format(var, value);
153                        Panel {
154                            row: i / ncols.max(1),
155                            col: i % ncols.max(1),
156                            label: formatted.clone(),
157                            row_label: None,
158                            col_label: Some(formatted),
159                            rect: crate::render::Rect {
160                                x: 0.0,
161                                y: 0.0,
162                                width: 0.0,
163                                height: 0.0,
164                            },
165                        }
166                    })
167                    .collect();
168
169                // Split data per panel per layer
170                let panels_data: Vec<Vec<DataFrame>> = levels
171                    .iter()
172                    .map(|level| {
173                        built_layers
174                            .iter()
175                            .map(|bl| Self::filter_data_by_var(&bl.data, var, level))
176                            .collect()
177                    })
178                    .collect();
179
180                (panels, panels_data)
181            }
182            Facet::Grid {
183                row_var,
184                col_var,
185                labeller,
186                ..
187            } => {
188                let mut row_levels: Vec<String> = Vec::new();
189                let mut col_levels: Vec<String> = Vec::new();
190
191                for bl in built_layers {
192                    if let Some(rv) = row_var {
193                        if let Some(col) = bl.data.column(rv) {
194                            for v in col {
195                                let key = v.to_group_key();
196                                if !row_levels.contains(&key) {
197                                    row_levels.push(key);
198                                }
199                            }
200                        }
201                    }
202                    if let Some(cv) = col_var {
203                        if let Some(col) = bl.data.column(cv) {
204                            for v in col {
205                                let key = v.to_group_key();
206                                if !col_levels.contains(&key) {
207                                    col_levels.push(key);
208                                }
209                            }
210                        }
211                    }
212                }
213
214                if row_levels.is_empty() {
215                    row_levels.push("".to_string());
216                }
217                if col_levels.is_empty() {
218                    col_levels.push("".to_string());
219                }
220
221                let mut panels = Vec::new();
222                let mut panels_data = Vec::new();
223
224                for (ri, rl) in row_levels.iter().enumerate() {
225                    for (ci, cl) in col_levels.iter().enumerate() {
226                        let row_fmt = if rl.is_empty() {
227                            None
228                        } else {
229                            let rv = row_var.as_deref().unwrap_or("");
230                            Some(labeller.format(rv, rl))
231                        };
232                        let col_fmt = if cl.is_empty() {
233                            None
234                        } else {
235                            let cv = col_var.as_deref().unwrap_or("");
236                            Some(labeller.format(cv, cl))
237                        };
238                        let label = match (&row_fmt, &col_fmt) {
239                            (Some(r), Some(c)) => format!("{r} | {c}"),
240                            (Some(r), None) => r.clone(),
241                            (None, Some(c)) => c.clone(),
242                            (None, None) => String::new(),
243                        };
244                        panels.push(Panel {
245                            row: ri,
246                            col: ci,
247                            label,
248                            row_label: row_fmt,
249                            col_label: col_fmt,
250                            rect: crate::render::Rect {
251                                x: 0.0,
252                                y: 0.0,
253                                width: 0.0,
254                                height: 0.0,
255                            },
256                        });
257
258                        let layer_data: Vec<DataFrame> = built_layers
259                            .iter()
260                            .map(|bl| {
261                                let mut data = bl.data.clone();
262                                if let Some(rv) = row_var {
263                                    if !rl.is_empty() {
264                                        data = Self::filter_data_by_var(&data, rv, rl);
265                                    }
266                                }
267                                if let Some(cv) = col_var {
268                                    if !cl.is_empty() {
269                                        data = Self::filter_data_by_var(&data, cv, cl);
270                                    }
271                                }
272                                data
273                            })
274                            .collect();
275                        panels_data.push(layer_data);
276                    }
277                }
278
279                (panels, panels_data)
280            }
281        }
282    }
283
284    fn filter_data_by_var(data: &DataFrame, var: &str, level: &str) -> DataFrame {
285        if let Some(col) = data.column(var) {
286            let indices: Vec<usize> = col
287                .iter()
288                .enumerate()
289                .filter(|(_, v)| v.to_group_key() == level)
290                .map(|(i, _)| i)
291                .collect();
292
293            let mut result = DataFrame::new();
294            for col_name in data.column_names() {
295                if let Some(src) = data.column(col_name) {
296                    let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
297                    result.add_column(col_name.to_string(), vals);
298                }
299            }
300            result
301        } else {
302            data.clone()
303        }
304    }
305
306    fn build_layer(
307        layer: Layer,
308        plot_data: &DataFrame,
309        plot_mapping: &Aes,
310        scale_set: &mut ScaleSet,
311        primary: Option<(u8, u8, u8)>,
312    ) -> Result<BuiltLayer, GGError> {
313        let Layer {
314            data: layer_data,
315            mapping: layer_mapping,
316            mut geom,
317            stat,
318            position,
319            params: _,
320            show_legend,
321        } = layer;
322
323        // Step 1: Resolve data — use layer data if provided, else plot data
324        let source_data = layer_data.unwrap_or_else(|| plot_data.clone());
325
326        // Step 2: Merge mappings — layer overrides plot-level
327        let merged_mapping = plot_mapping.merge(&layer_mapping);
328
329        // Brand/primary color: apply to a single-series geom only when the layer
330        // maps neither color nor fill (an explicit aesthetic always wins).
331        if let Some(color) = primary {
332            let has_color = merged_mapping.get_mapping(&Aesthetic::Color).is_some();
333            let has_fill = merged_mapping.get_mapping(&Aesthetic::Fill).is_some();
334            if !has_color && !has_fill {
335                geom.set_series_color(color);
336            }
337        }
338
339        // Step 3: Evaluate aes — rename columns to canonical names
340        let mut working_data = resolve_mappings(&source_data, &merged_mapping);
341
342        // Remember which columns the user actually supplied (pre-stat). A required
343        // aesthetic is satisfied if it was present here OR is synthesized by the
344        // stat (checked after Step 6) — e.g. boxplot maps `y` then the stat turns
345        // it into ymin/ymax, while StatEcdf produces `y` that wasn't mapped.
346        let pre_stat_columns: Vec<String> = working_data
347            .column_names()
348            .iter()
349            .map(|s| s.to_string())
350            .collect();
351
352        // Step 4: Ensure scales exist for each mapped aesthetic
353        for m in &merged_mapping.mappings {
354            scale_set.ensure_scale(&m.aesthetic, &working_data);
355        }
356
357        // Step 5: Scale transformation (e.g., log10 before stats)
358        for scale in scale_set.iter() {
359            let col_name = scale.aesthetic().col_name().to_string();
360            if let Some(col) = working_data.column(&col_name) {
361                let transformed: Vec<_> = col.iter().map(|v| scale.transform(v)).collect();
362                let any_changed = transformed.iter().zip(col.iter()).any(|(t, o)| {
363                    match (t.as_f64(), o.as_f64()) {
364                        (Some(a), Some(b)) => (a - b).abs() > f64::EPSILON,
365                        _ => false,
366                    }
367                });
368                if any_changed {
369                    if let Some(col_mut) = working_data.column_mut(&col_name) {
370                        *col_mut = transformed;
371                    }
372                }
373            }
374        }
375
376        // Step 5b: Filter out-of-bounds data (xlim/ylim filter before stats)
377        Self::filter_oob_data(&mut working_data, scale_set);
378
379        // Step 6: Compute statistics
380        let group_cols = Self::detect_group_columns(&working_data);
381
382        working_data = if !group_cols.is_empty() {
383            let groups =
384                working_data.group_by(&group_cols.iter().map(|s| s.as_str()).collect::<Vec<_>>());
385            let mut result = DataFrame::new();
386            for group in groups {
387                let computed = stat.compute_group(&group, scale_set);
388                result.vstack(&computed);
389            }
390            result
391        } else {
392            stat.compute_group(&working_data, scale_set)
393        };
394
395        // Step 6a: Apply after_stat() mappings (rename stat-computed columns)
396        apply_after_stat(&mut working_data, &merged_mapping);
397
398        // Step 6a-validate: A required aesthetic must have been supplied by the
399        // user (pre-stat) or synthesized by the stat (post-stat). This lets
400        // StatEcdf produce `y` for geom_step, while boxplot — which maps `y` then
401        // consumes it into ymin/ymax — still validates. Empty input has the
402        // column in neither place, so genuinely-missing aesthetics still error.
403        for aes in &geom.required_aes() {
404            let col_name = aes.col_name();
405            let supplied = pre_stat_columns.iter().any(|c| c == col_name);
406            if !supplied && !working_data.has_column(col_name) {
407                return Err(GGError::ValidationError(format!(
408                    "geom_{} requires aesthetic '{}' but it was not provided",
409                    geom.name(),
410                    col_name
411                )));
412            }
413        }
414
415        // Step 6b: Ensure scales for stat-computed aesthetics (e.g. y from StatCount/StatBin)
416        let stat_aes = [
417            ("x", Aesthetic::X),
418            ("y", Aesthetic::Y),
419            ("xmin", Aesthetic::X),
420            ("xmax", Aesthetic::X),
421            ("ymin", Aesthetic::Y),
422            ("ymax", Aesthetic::Y),
423        ];
424        for (col, aes) in &stat_aes {
425            if working_data.has_column(col) {
426                scale_set.ensure_scale(aes, &working_data);
427            }
428        }
429
430        // Step 6c: For stat-computed Y (bars, histograms), ensure Y scale includes zero baseline
431        let y_is_user_mapped = merged_mapping.get_mapping(&Aesthetic::Y).is_some();
432        if !y_is_user_mapped && working_data.has_column("y") {
433            if let Some(y_scale) = scale_set.get_mut(&Aesthetic::Y) {
434                y_scale.train(&[crate::data::Value::Float(0.0)]);
435            }
436        }
437
438        // Step 7: Position adjustment
439        let params = PositionParams::default();
440        position.compute(&mut working_data, &params);
441
442        // Step 8: Train scales on this layer's data
443        scale_set.train_layer(&working_data);
444
445        // Step 8b: Positional scales also need to see stat-computed extent columns
446        // (e.g. boxplot/errorbar/pointrange emit ymin/ymax but no "y"). Without
447        // this the Y (or X) scale would never train on the range and collapse.
448        for (col, aes) in &stat_aes {
449            if let Some(values) = working_data.column(col) {
450                if let Some(scale) = scale_set.get_mut(aes) {
451                    scale.train(values);
452                }
453            }
454        }
455
456        Ok(BuiltLayer {
457            data: working_data,
458            geom,
459            show_legend,
460        })
461    }
462
463    /// Remove rows where x or y falls outside scale limits set via xlim/ylim.
464    fn filter_oob_data(data: &mut DataFrame, scale_set: &ScaleSet) {
465        let x_limits = scale_set.get(&Aesthetic::X).and_then(|s| s.filter_limits());
466        let y_limits = scale_set.get(&Aesthetic::Y).and_then(|s| s.filter_limits());
467
468        if x_limits.is_none() && y_limits.is_none() {
469            return;
470        }
471
472        let nrows = data.nrows();
473        let mut keep = vec![true; nrows];
474
475        if let Some((min, max)) = x_limits {
476            if let Some(col) = data.column("x") {
477                for (i, v) in col.iter().enumerate() {
478                    if let Some(f) = v.as_f64() {
479                        if f < min || f > max {
480                            keep[i] = false;
481                        }
482                    }
483                }
484            }
485        }
486
487        if let Some((min, max)) = y_limits {
488            if let Some(col) = data.column("y") {
489                for (i, v) in col.iter().enumerate() {
490                    if let Some(f) = v.as_f64() {
491                        if f < min || f > max {
492                            keep[i] = false;
493                        }
494                    }
495                }
496            }
497        }
498
499        // If nothing was filtered, skip the rebuild
500        if keep.iter().all(|&k| k) {
501            return;
502        }
503
504        let indices: Vec<usize> = keep
505            .iter()
506            .enumerate()
507            .filter(|(_, &k)| k)
508            .map(|(i, _)| i)
509            .collect();
510
511        let mut result = DataFrame::new();
512        for col_name in data.column_names() {
513            if let Some(src) = data.column(col_name) {
514                let vals: Vec<_> = indices.iter().map(|&i| src[i].clone()).collect();
515                result.add_column(col_name.to_string(), vals);
516            }
517        }
518        *data = result;
519    }
520
521    /// Compute per-panel scale sets for free facet scales.
522    /// For each panel, clones the base scale set, resets freed axes, and retrains on panel data.
523    fn compute_panel_scales(
524        facet_scales: &FacetScales,
525        panels_data: &[Vec<DataFrame>],
526        base_scales: &ScaleSet,
527    ) -> Vec<ScaleSet> {
528        if matches!(facet_scales, FacetScales::Fixed) || panels_data.is_empty() {
529            return vec![];
530        }
531
532        let free_x = matches!(facet_scales, FacetScales::FreeX | FacetScales::Free);
533        let free_y = matches!(facet_scales, FacetScales::FreeY | FacetScales::Free);
534
535        panels_data
536            .iter()
537            .map(|panel_layers| {
538                let mut panel_set = base_scales.clone();
539
540                // Reset freed axis scales
541                if free_x {
542                    if let Some(s) = panel_set.get_mut(&Aesthetic::X) {
543                        s.reset_training();
544                    }
545                }
546                if free_y {
547                    if let Some(s) = panel_set.get_mut(&Aesthetic::Y) {
548                        s.reset_training();
549                    }
550                }
551
552                // Retrain on this panel's data
553                for layer_data in panel_layers {
554                    panel_set.train_layer(layer_data);
555                }
556
557                panel_set
558            })
559            .collect()
560    }
561
562    /// Compute which aesthetics should be suppressed from the legend.
563    /// An aesthetic is suppressed if every layer that has the corresponding column
564    /// sets show_legend=Some(false), and no layer has it as None or Some(true).
565    fn compute_suppressed_aes(built_layers: &[BuiltLayer]) -> std::collections::HashSet<Aesthetic> {
566        use std::collections::HashSet;
567        let legend_aes = [
568            Aesthetic::Color,
569            Aesthetic::Fill,
570            Aesthetic::Shape,
571            Aesthetic::Linetype,
572            Aesthetic::Size,
573            Aesthetic::Alpha,
574        ];
575        let mut suppressed = HashSet::new();
576        for aes in &legend_aes {
577            let col_name = aes.col_name();
578            let mut any_has = false;
579            let mut all_hidden = true;
580            for bl in built_layers {
581                if bl.data.has_column(col_name) {
582                    any_has = true;
583                    match bl.show_legend {
584                        Some(false) => {} // still hidden
585                        _ => {
586                            all_hidden = false;
587                            break;
588                        }
589                    }
590                }
591            }
592            if any_has && all_hidden {
593                suppressed.insert(aes.clone());
594            }
595        }
596        suppressed
597    }
598
599    /// Detect which columns to group by for statistics.
600    /// Checks group/color/fill plus discrete x (like R's auto-grouping by discrete x).
601    fn detect_group_columns(data: &DataFrame) -> Vec<String> {
602        let candidates = ["group", "color", "fill", "x"];
603        let mut group_cols = Vec::new();
604        for &col in &candidates {
605            if data.has_column(col) {
606                if let Some(values) = data.column(col) {
607                    let is_discrete = values
608                        .iter()
609                        .any(|v| matches!(v, crate::data::Value::Str(_)));
610                    if is_discrete {
611                        group_cols.push(col.to_string());
612                    }
613                }
614            }
615        }
616        group_cols
617    }
618}