charton 0.5.0

A high-performance, layered charting system for Rust, featuring a flexible data core and multi-backend rendering.
Documentation
use crate::TEMP_SUFFIX;
use crate::chart::Chart;
use crate::core::data::{ColumnVector, Dataset};
use crate::error::ChartonError;
use crate::mark::Mark;
use ahash::AHashMap;

impl<T: Mark> Chart<T> {
    /// Handle grouping and aggregation of data for histogram chart.
    /// Uses explicit unique_values() for color to maintain consistent appearance order.
    pub(crate) fn transform_histogram_data(mut self) -> Result<Self, ChartonError> {
        // --- STEP 1: Extract Encodings ---
        let x_enc = self
            .encoding
            .x
            .as_ref()
            .ok_or_else(|| ChartonError::Encoding("X missing".into()))?;
        let y_enc = self
            .encoding
            .y
            .as_ref()
            .ok_or_else(|| ChartonError::Encoding("Y missing".into()))?;
        let color_enc = self.encoding.color.as_ref();

        let bin_field = &x_enc.field;
        let count_field = &y_enc.field;

        // --- STEP 2: Calculate Binning Parameters ---
        let x_col = self.data.column(bin_field)?;
        let (min_val, max_val) = x_col.min_max();
        let n_bins = x_enc.bins.unwrap_or(10);

        let bin_width = if n_bins > 1 {
            (max_val - min_val) / (n_bins as f64)
        } else {
            1.0
        };

        // Pre-calculate bin midpoints (Natural numeric order for X-axis)
        let bin_middles: Vec<f64> = (0..n_bins)
            .map(|i| min_val + (i as f64 + 0.5) * bin_width)
            .collect();

        // --- STEP 3: Establish Deterministic Order for Color ---
        // We use unique_values() to capture the first-appearance order.
        let color_list: Vec<String> = if let Some(c_enc) = color_enc {
            self.data.column(&c_enc.field)?.unique_values()
        } else {
            vec![format!("{}_default", TEMP_SUFFIX)]
        };

        // --- STEP 4: Aggregate Counts (The "Group By" phase) ---
        // Key: (bin_index, color_label), Value: count
        let mut lookup: AHashMap<(usize, String), f64> = AHashMap::new();
        let row_count = self.data.height();

        for i in 0..row_count {
            let val = x_col.get_f64(i).unwrap_or(min_val);
            // Calculate which bin this value falls into
            let bin_idx = (((val - min_val) / bin_width).floor() as usize).min(n_bins - 1);

            let color_label = if let Some(c_enc) = color_enc {
                self.data.get_str_or(&c_enc.field, i, "null")
            } else {
                format!("{}_default", TEMP_SUFFIX)
            };

            *lookup.entry((bin_idx, color_label)).or_insert(0.0) += 1.0;
        }

        // --- STEP 5: Apply Normalization (Optional) ---
        if y_enc.normalize {
            if color_enc.is_some() {
                // Normalize within each color group: sum(counts per color) = 1.0
                let mut color_sums = AHashMap::new();
                for ((_, color), count) in &lookup {
                    *color_sums.entry(color.clone()).or_insert(0.0) += *count;
                }
                for ((_, color), count) in lookup.iter_mut() {
                    let total = color_sums.get(color).copied().unwrap_or(1.0);
                    if total > 0.0 {
                        *count /= total;
                    }
                }
            } else {
                // Global normalization: sum(all counts) = 1.0
                let total: f64 = lookup.values().sum();
                if total > 0.0 {
                    for count in lookup.values_mut() {
                        *count /= total;
                    }
                }
            }
        }

        // --- STEP 6: Cartesian Product & Gap Filling (Using the established order) ---
        // We iterate over the fixed bin indices and the ordered color_list.
        let mut final_x = Vec::new();
        let mut final_y = Vec::new();
        let mut final_color = Vec::new();

        for (bin_idx, &mid) in bin_middles.iter().enumerate().take(n_bins) {
            for color in &color_list {
                let count = lookup
                    .get(&(bin_idx, color.clone()))
                    .copied()
                    .unwrap_or(0.0);

                final_x.push(mid);
                final_y.push(count);

                if color_enc.is_some() {
                    final_color.push(color.clone());
                }
            }
        }

        // --- STEP 7: Rebuild Dataset ---
        let mut new_ds = Dataset::new();
        new_ds.add_column(bin_field, ColumnVector::F64 { data: final_x })?;
        new_ds.add_column(count_field, ColumnVector::F64 { data: final_y })?;

        if let Some(c_enc) = color_enc {
            new_ds.add_column(
                &c_enc.field,
                ColumnVector::String {
                    data: final_color,
                    validity: None, // Cartesian product ensures every slot is filled with a valid String
                },
            )?;
        }

        self.data = new_ds;
        Ok(self)
    }
}