use crate::TEMP_SUFFIX;
use crate::chart::Chart;
use crate::core::data::{ColumnVector, Dataset};
use crate::error::ChartonError;
use crate::mark::Mark;
use ahash::AHashMap;
impl<T: Mark> Chart<T> {
pub(crate) fn transform_histogram_data(mut self) -> Result<Self, ChartonError> {
let x_enc = self
.encoding
.x
.as_ref()
.ok_or_else(|| ChartonError::Encoding("X missing".into()))?;
let y_enc = self
.encoding
.y
.as_ref()
.ok_or_else(|| ChartonError::Encoding("Y missing".into()))?;
let color_enc = self.encoding.color.as_ref();
let bin_field = &x_enc.field;
let count_field = &y_enc.field;
let x_col = self.data.column(bin_field)?;
let (min_val, max_val) = x_col.min_max();
let n_bins = x_enc.bins.unwrap_or(10);
let bin_width = if n_bins > 1 {
(max_val - min_val) / (n_bins as f64)
} else {
1.0
};
let bin_middles: Vec<f64> = (0..n_bins)
.map(|i| min_val + (i as f64 + 0.5) * bin_width)
.collect();
let color_list: Vec<String> = if let Some(c_enc) = color_enc {
self.data.column(&c_enc.field)?.unique_values()
} else {
vec![format!("{}_default", TEMP_SUFFIX)]
};
let mut lookup: AHashMap<(usize, String), f64> = AHashMap::new();
let row_count = self.data.height();
for i in 0..row_count {
let val = x_col.get_f64(i).unwrap_or(min_val);
let bin_idx = (((val - min_val) / bin_width).floor() as usize).min(n_bins - 1);
let color_label = if let Some(c_enc) = color_enc {
self.data.get_str_or(&c_enc.field, i, "null")
} else {
format!("{}_default", TEMP_SUFFIX)
};
*lookup.entry((bin_idx, color_label)).or_insert(0.0) += 1.0;
}
if y_enc.normalize {
if color_enc.is_some() {
let mut color_sums = AHashMap::new();
for ((_, color), count) in &lookup {
*color_sums.entry(color.clone()).or_insert(0.0) += *count;
}
for ((_, color), count) in lookup.iter_mut() {
let total = color_sums.get(color).copied().unwrap_or(1.0);
if total > 0.0 {
*count /= total;
}
}
} else {
let total: f64 = lookup.values().sum();
if total > 0.0 {
for count in lookup.values_mut() {
*count /= total;
}
}
}
}
let mut final_x = Vec::new();
let mut final_y = Vec::new();
let mut final_color = Vec::new();
for (bin_idx, &mid) in bin_middles.iter().enumerate().take(n_bins) {
for color in &color_list {
let count = lookup
.get(&(bin_idx, color.clone()))
.copied()
.unwrap_or(0.0);
final_x.push(mid);
final_y.push(count);
if color_enc.is_some() {
final_color.push(color.clone());
}
}
}
let mut new_ds = Dataset::new();
new_ds.add_column(
bin_field,
ColumnVector::Float64 {
data: final_x,
validity: None,
},
)?;
new_ds.add_column(
count_field,
ColumnVector::Float64 {
data: final_y,
validity: None,
},
)?;
if let Some(c_enc) = color_enc {
new_ds.add_column(
&c_enc.field,
ColumnVector::String {
data: final_color,
validity: None,
},
)?;
}
self.data = new_ds;
Ok(self)
}
}