use super::{
compute_dodge_offsets, compute_group_indices, is_continuous_scale, non_facet_partition_cols,
Layer, PositionTrait, PositionType,
};
use crate::array_util::{as_f64, cast_array, new_f64_array_non_null};
use crate::plot::layer::geom::types::SIDE_VALUES;
use crate::plot::types::{DefaultParamValue, ParamConstraint, ParamDefinition, ParameterValue};
use crate::{naming, DataFrame, GgsqlError, Plot, Result};
use arrow::array::Array;
use arrow::datatypes::DataType;
use rand::Rng;
const DISTRIBUTION_VALUES: &[&str] = &["uniform", "normal", "density", "intensity"];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum JitterSide {
Both,
Positive,
Negative,
}
impl JitterSide {
fn from_str(s: &str) -> Self {
match s {
"right" | "top" => Self::Positive,
"left" | "bottom" => Self::Negative,
_ => Self::Both,
}
}
fn fold(self, raw: f64) -> f64 {
match self {
Self::Both => raw,
Self::Positive => raw.abs(),
Self::Negative => -raw.abs(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum JitterDistribution {
Uniform,
Normal,
Density,
Intensity,
}
impl JitterDistribution {
fn from_str(s: &str) -> Self {
match s.to_lowercase().as_str() {
"normal" | "gaussian" => Self::Normal,
"density" => Self::Density,
"intensity" => Self::Intensity,
_ => Self::Uniform,
}
}
fn sample<R: Rng>(&self, rng: &mut R, width: f64) -> f64 {
match self {
Self::Uniform | Self::Density | Self::Intensity => (rng.gen::<f64>() - 0.5) * width,
Self::Normal => {
let sigma = width / 4.0;
let u1: f64 = rng.gen();
let u2: f64 = rng.gen();
let u1 = if u1 == 0.0 { f64::MIN_POSITIVE } else { u1 };
let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
z * sigma
}
}
}
}
fn silverman_bandwidth(values: &[f64], adjust: f64) -> f64 {
let n = values.len() as f64;
if n <= 1.0 {
return 1.0;
}
let mean = values.iter().sum::<f64>() / n;
let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
let std_dev = variance.sqrt();
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let q1 = quantile_cont(&sorted, 0.25);
let q3 = quantile_cont(&sorted, 0.75);
let iqr = q3 - q1;
let scale = if iqr > 0.0 {
std_dev.min(iqr / 1.34)
} else {
std_dev
};
if scale == 0.0 {
return 1.0; }
0.9 * adjust * scale * n.powf(-0.2)
}
fn quantile_cont(sorted: &[f64], p: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
if sorted.len() == 1 {
return sorted[0];
}
let n = sorted.len() as f64;
let idx = p * (n - 1.0);
let lo = idx.floor() as usize;
let hi = idx.ceil() as usize;
let frac = idx - lo as f64;
if lo == hi || hi >= sorted.len() {
sorted[lo]
} else {
sorted[lo] * (1.0 - frac) + sorted[hi] * frac
}
}
fn compute_densities(values: &[f64], bandwidth: f64) -> Vec<f64> {
let n = values.len() as f64;
compute_intensities(values, bandwidth)
.into_iter()
.map(|i| i / n)
.collect()
}
fn compute_intensities(values: &[f64], bandwidth: f64) -> Vec<f64> {
let norm_factor = 1.0 / (bandwidth * (2.0 * std::f64::consts::PI).sqrt());
values
.iter()
.map(|&xi| {
let intensity: f64 = values
.iter()
.map(|&xj| {
let u = (xi - xj) / bandwidth;
(-0.5 * u * u).exp()
})
.sum();
intensity * norm_factor
})
.collect()
}
fn compute_grouped_scales(
values: &[f64],
group_indices: &[usize],
n_groups: usize,
explicit_bandwidth: Option<f64>,
adjust: f64,
use_intensity: bool,
) -> Vec<f64> {
let mut grouped_values: Vec<Vec<f64>> = vec![Vec::new(); n_groups];
let mut grouped_original_indices: Vec<Vec<usize>> = vec![Vec::new(); n_groups];
for (i, (&value, &group_idx)) in values.iter().zip(group_indices.iter()).enumerate() {
grouped_values[group_idx].push(value);
grouped_original_indices[group_idx].push(i);
}
let mut all_raw_values = vec![0.0; values.len()];
for group_idx in 0..n_groups {
let group_vals = &grouped_values[group_idx];
if group_vals.is_empty() {
continue;
}
let bandwidth = explicit_bandwidth
.map(|bw| bw * adjust)
.unwrap_or_else(|| silverman_bandwidth(group_vals, adjust));
let raw = if use_intensity {
compute_intensities(group_vals, bandwidth)
} else {
compute_densities(group_vals, bandwidth)
};
for (within_group_idx, &original_idx) in
grouped_original_indices[group_idx].iter().enumerate()
{
all_raw_values[original_idx] = raw[within_group_idx];
}
}
let global_max = all_raw_values.iter().fold(0.0_f64, |a, &b| a.max(b));
if global_max > 0.0 {
all_raw_values.iter().map(|v| v / global_max).collect()
} else {
vec![1.0; values.len()]
}
}
#[derive(Debug, Clone, Copy)]
pub struct Jitter;
impl std::fmt::Display for Jitter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "jitter")
}
}
impl PositionTrait for Jitter {
fn position_type(&self) -> PositionType {
PositionType::Jitter
}
fn default_params(&self) -> &'static [ParamDefinition] {
const PARAMS: &[ParamDefinition] = &[
ParamDefinition {
name: "width",
default: DefaultParamValue::Number(0.9),
constraint: ParamConstraint::number_range(0.0, 1.0),
},
ParamDefinition {
name: "dodge",
default: DefaultParamValue::Boolean(true),
constraint: ParamConstraint::boolean(),
},
ParamDefinition {
name: "distribution",
default: DefaultParamValue::String("uniform"),
constraint: ParamConstraint::string_option(DISTRIBUTION_VALUES),
},
ParamDefinition {
name: "bandwidth",
default: DefaultParamValue::Null,
constraint: ParamConstraint::number_min_exclusive(0.0),
},
ParamDefinition {
name: "adjust",
default: DefaultParamValue::Number(1.0),
constraint: ParamConstraint::number_min_exclusive(0.0),
},
ParamDefinition {
name: "side",
default: DefaultParamValue::String("both"),
constraint: ParamConstraint::string_option(SIDE_VALUES),
},
];
PARAMS
}
fn creates_pos1offset(&self) -> bool {
true
}
fn creates_pos2offset(&self) -> bool {
true
}
fn apply_adjustment(
&self,
df: DataFrame,
layer: &Layer,
spec: &Plot,
) -> Result<(DataFrame, Option<f64>)> {
Ok((apply_jitter(df, layer, spec)?, None))
}
}
fn compute_density_scales(
df: &DataFrame,
layer: &Layer,
pos1_continuous: bool,
use_intensity: bool,
dodge: bool,
explicit_bandwidth: Option<f64>,
adjust: f64,
) -> Result<Option<Vec<f64>>> {
let continuous_col = if pos1_continuous { "pos1" } else { "pos2" };
let discrete_col = if pos1_continuous { "pos2" } else { "pos1" };
let continuous_col_name = naming::aesthetic_column(continuous_col);
let discrete_col_name = naming::aesthetic_column(discrete_col);
let col = df.column(&continuous_col_name).map_err(|_| {
GgsqlError::InternalError(format!(
"Missing {} column for density jitter",
continuous_col
))
})?;
let casted = cast_array(col, &DataType::Float64).map_err(|_| {
GgsqlError::InternalError(format!(
"{} must be numeric for density jitter",
continuous_col
))
})?;
let f64_arr = as_f64(&casted).map_err(|_| {
GgsqlError::InternalError(format!(
"{} must be numeric for density jitter",
continuous_col
))
})?;
let values: Vec<f64> = (0..f64_arr.len())
.map(|i| {
if f64_arr.is_null(i) {
0.0
} else {
f64_arr.value(i)
}
})
.collect();
let mut density_group_cols = vec![discrete_col_name.clone()];
for col in &layer.partition_by {
if density_group_cols.contains(col) {
continue;
}
if !dodge && !col.contains("_facet") {
continue;
}
density_group_cols.push(col.clone());
}
let density_group_info = compute_group_indices(df, &density_group_cols)?;
if let Some(info) = density_group_info {
Ok(Some(compute_grouped_scales(
&values,
&info.indices,
info.n_groups,
explicit_bandwidth,
adjust,
use_intensity,
)))
} else {
let bandwidth = explicit_bandwidth
.map(|bw| bw * adjust)
.unwrap_or_else(|| silverman_bandwidth(&values, adjust));
let raw = if use_intensity {
compute_intensities(&values, bandwidth)
} else {
compute_densities(&values, bandwidth)
};
let max_val = raw.iter().fold(0.0_f64, |a, &b| a.max(b));
if max_val > 0.0 {
Ok(Some(raw.iter().map(|v| v / max_val).collect()))
} else {
Ok(Some(vec![1.0; values.len()]))
}
}
}
fn apply_jitter(df: DataFrame, layer: &Layer, spec: &Plot) -> Result<DataFrame> {
let jitter_pos1 = is_continuous_scale(spec, "pos1") == Some(false);
let jitter_pos2 = is_continuous_scale(spec, "pos2") == Some(false);
let width = layer
.parameters
.get("width")
.and_then(|v| match v {
ParameterValue::Number(n) => Some(*n),
_ => None,
})
.unwrap_or(0.9);
let dodge = layer
.parameters
.get("dodge")
.and_then(|v| match v {
ParameterValue::Boolean(b) => Some(*b),
_ => None,
})
.unwrap_or(true);
let distribution = layer
.parameters
.get("distribution")
.and_then(|v| match v {
ParameterValue::String(s) => Some(JitterDistribution::from_str(s)),
_ => None,
})
.unwrap_or(JitterDistribution::Uniform);
let side = layer
.parameters
.get("side")
.and_then(|v| match v {
ParameterValue::String(s) => Some(JitterSide::from_str(s.as_str())),
_ => None,
})
.unwrap_or(JitterSide::Both);
let pos1_continuous = !jitter_pos1;
let pos2_continuous = !jitter_pos2;
let use_density_scaling = distribution == JitterDistribution::Density
|| distribution == JitterDistribution::Intensity;
if use_density_scaling && (pos1_continuous == pos2_continuous) {
let dist_name = if distribution == JitterDistribution::Intensity {
"intensity"
} else {
"density"
};
return Err(GgsqlError::ValidationError(format!(
"Jitter distribution '{}' requires exactly one continuous axis",
dist_name
)));
}
let mut rng = rand::thread_rng();
let n_rows = df.height();
let group_cols = non_facet_partition_cols(&layer.partition_by, spec);
let group_info = if dodge {
compute_group_indices(&df, &group_cols)?
} else {
None
};
let (n_groups, group_indices) = match &group_info {
Some(info) if info.n_groups > 1 => (info.n_groups, Some(&info.indices)),
_ => (1, None),
};
let explicit_bandwidth = layer.parameters.get("bandwidth").and_then(|v| match v {
ParameterValue::Number(n) => Some(*n),
_ => None,
});
let adjust = layer
.parameters
.get("adjust")
.and_then(|v| match v {
ParameterValue::Number(n) => Some(*n),
_ => None,
})
.unwrap_or(1.0);
let use_intensity = distribution == JitterDistribution::Intensity;
let density_scales = if use_density_scaling {
compute_density_scales(
&df,
layer,
pos1_continuous,
use_intensity,
dodge,
explicit_bandwidth,
adjust,
)?
} else {
None
};
let pos1offset_col = naming::aesthetic_column("pos1offset");
let pos2offset_col = naming::aesthetic_column("pos2offset");
let mut result = df;
let dodge_offsets = if n_groups > 1 {
let indices = group_indices.unwrap();
Some(compute_dodge_offsets(
indices,
n_groups,
width,
jitter_pos1,
jitter_pos2,
))
} else {
None
};
let make_jitter =
|rng: &mut rand::rngs::ThreadRng, jitter_width: f64, count: usize| -> Vec<f64> {
(0..count)
.map(|i| {
let raw = distribution.sample(rng, jitter_width);
let folded = side.fold(raw);
if let Some(ref scales) = density_scales {
folded * scales[i]
} else {
folded
}
})
.collect()
};
if jitter_pos1 {
let jitter_width = dodge_offsets
.as_ref()
.map(|d| d.adjusted_width)
.unwrap_or(width);
let jitters = make_jitter(&mut rng, jitter_width, n_rows);
let offsets: Vec<f64> = if let Some(ref dodge) = dodge_offsets {
if let Some(ref centers) = dodge.pos1 {
centers
.iter()
.zip(jitters.iter())
.map(|(c, j)| c + j)
.collect()
} else {
jitters
}
} else {
jitters
};
result = result.with_column(&pos1offset_col, new_f64_array_non_null(offsets))?;
}
if jitter_pos2 {
let jitter_width = dodge_offsets
.as_ref()
.map(|d| d.adjusted_width)
.unwrap_or(width);
let jitters = make_jitter(&mut rng, jitter_width, n_rows);
let offsets: Vec<f64> = if let Some(ref dodge) = dodge_offsets {
if let Some(ref centers) = dodge.pos2 {
centers
.iter()
.zip(jitters.iter())
.map(|(c, j)| c + j)
.collect()
} else {
jitters
}
} else {
jitters
};
result = result.with_column(&pos2offset_col, new_f64_array_non_null(offsets))?;
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::array_util::{as_f64, as_str, value_to_string};
use crate::df;
use crate::plot::layer::Geom;
use crate::plot::{AestheticValue, Mappings, Scale, ScaleType};
fn make_test_df() -> DataFrame {
df! {
"__ggsql_aes_pos1__" => vec!["A", "A", "B", "B"],
"__ggsql_aes_pos2__" => vec![10.0, 20.0, 15.0, 25.0],
"__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0],
"__ggsql_aes_fill__" => vec!["X", "Y", "X", "Y"],
}
.unwrap()
}
fn make_test_layer() -> Layer {
let mut layer = Layer::new(Geom::bar());
layer.mappings = {
let mut m = Mappings::new();
m.insert(
"pos1",
AestheticValue::standard_column("__ggsql_aes_pos1__"),
);
m.insert(
"pos2",
AestheticValue::standard_column("__ggsql_aes_pos2__"),
);
m.insert(
"pos2end",
AestheticValue::standard_column("__ggsql_aes_pos2end__"),
);
m.insert(
"fill",
AestheticValue::standard_column("__ggsql_aes_fill__"),
);
m
};
layer.partition_by = vec!["__ggsql_aes_fill__".to_string()];
layer
}
fn make_continuous_scale(aesthetic: &str) -> Scale {
let mut scale = Scale::new(aesthetic);
scale.scale_type = Some(ScaleType::continuous());
scale
}
fn make_discrete_scale(aesthetic: &str) -> Scale {
let mut scale = Scale::new(aesthetic);
scale.scale_type = Some(ScaleType::discrete());
scale
}
#[test]
fn test_jitter_horizontal_only_with_dodge() {
let jitter = Jitter;
let df = make_test_df();
let layer = make_test_layer();
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, width) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
assert!(
result.column("__ggsql_aes_pos1offset__").is_ok(),
"pos1offset column should be created"
);
assert!(
result.column("__ggsql_aes_pos2offset__").is_err(),
"pos2offset column should NOT be created when pos2 is continuous"
);
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
for &v in &offsets {
assert!(
(-0.45..=0.45).contains(&v),
"Jitter+dodge offset {} should be in range [-0.45, 0.45]",
v
);
}
assert!(width.is_none());
}
#[test]
fn test_jitter_horizontal_no_dodge() {
let jitter = Jitter;
let df = make_test_df();
let mut layer = make_test_layer();
layer
.parameters
.insert("dodge".to_string(), ParameterValue::Boolean(false));
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
for &v in &offsets {
assert!(
(-0.45..=0.45).contains(&v),
"Pure jitter offset {} should be in range [-0.45, 0.45]",
v
);
}
}
#[test]
fn test_jitter_vertical_only() {
let jitter = Jitter;
let df = make_test_df();
let layer = make_test_layer();
let mut spec = Plot::new();
spec.scales.push(make_continuous_scale("pos1"));
spec.scales.push(make_discrete_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
assert!(
result.column("__ggsql_aes_pos1offset__").is_err(),
"pos1offset column should NOT be created when pos1 is continuous"
);
assert!(
result.column("__ggsql_aes_pos2offset__").is_ok(),
"pos2offset column should be created"
);
let offset_col = result.column("__ggsql_aes_pos2offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
for &v in &offsets {
assert!(
(-0.45..=0.45).contains(&v),
"Jitter+dodge offset {} should be in range [-0.45, 0.45]",
v
);
}
}
#[test]
fn test_jitter_bidirectional() {
let jitter = Jitter;
let df = make_test_df();
let layer = make_test_layer();
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_discrete_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
assert!(
result.column("__ggsql_aes_pos1offset__").is_ok(),
"pos1offset column should be created"
);
assert!(
result.column("__ggsql_aes_pos2offset__").is_ok(),
"pos2offset column should be created"
);
}
#[test]
fn test_jitter_neither_discrete() {
let jitter = Jitter;
let df = make_test_df();
let layer = make_test_layer();
let mut spec = Plot::new();
spec.scales.push(make_continuous_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
assert!(
result.column("__ggsql_aes_pos1offset__").is_err(),
"pos1offset column should NOT be created when pos1 is continuous"
);
assert!(
result.column("__ggsql_aes_pos2offset__").is_err(),
"pos2offset column should NOT be created when pos2 is continuous"
);
}
#[test]
fn test_jitter_custom_width_with_dodge() {
let jitter = Jitter;
let df = make_test_df();
let mut layer = make_test_layer();
layer
.parameters
.insert("width".to_string(), ParameterValue::Number(0.6));
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
for &v in &offsets {
assert!(
(-0.3..=0.3).contains(&v),
"Jitter+dodge offset {} should be in range [-0.3, 0.3] with width 0.6",
v
);
}
}
#[test]
fn test_jitter_groups_separate_with_dodge() {
let jitter = Jitter;
let df = make_test_df();
let layer = make_test_layer();
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let fill_arr = result.column("__ggsql_aes_fill__").unwrap();
let mut group_x_offsets = vec![];
let mut group_y_offsets = vec![];
for i in 0..result.height() {
let fill_val = value_to_string(fill_arr, i);
let offset_val = offset.value(i);
if fill_val.contains('X') {
group_x_offsets.push(offset_val);
} else {
group_y_offsets.push(offset_val);
}
}
let x_mean: f64 = group_x_offsets.iter().sum::<f64>() / group_x_offsets.len() as f64;
let y_mean: f64 = group_y_offsets.iter().sum::<f64>() / group_y_offsets.len() as f64;
assert!(
x_mean < y_mean,
"Group X mean ({}) should be less than Group Y mean ({})",
x_mean,
y_mean
);
}
#[test]
fn test_jitter_no_groups_no_dodge() {
let jitter = Jitter;
let df = make_test_df();
let mut layer = make_test_layer();
layer.partition_by = vec![];
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
for &v in &offsets {
assert!(
(-0.45..=0.45).contains(&v),
"Pure jitter offset {} should be in range [-0.45, 0.45]",
v
);
}
}
#[test]
fn test_jitter_creates_pos1offset() {
assert!(Jitter.creates_pos1offset());
}
#[test]
fn test_jitter_creates_pos2offset() {
assert!(Jitter.creates_pos2offset());
}
#[test]
fn test_jitter_default_params() {
let jitter = Jitter;
let params = jitter.default_params();
assert_eq!(params.len(), 6);
assert_eq!(params[0].name, "width");
assert!(matches!(params[0].default, DefaultParamValue::Number(0.9)));
assert_eq!(params[1].name, "dodge");
assert!(matches!(
params[1].default,
DefaultParamValue::Boolean(true)
));
assert_eq!(params[2].name, "distribution");
assert!(matches!(
params[2].default,
DefaultParamValue::String("uniform")
));
assert_eq!(params[3].name, "bandwidth");
assert!(matches!(params[3].default, DefaultParamValue::Null));
assert_eq!(params[4].name, "adjust");
assert!(matches!(params[4].default, DefaultParamValue::Number(1.0)));
assert_eq!(params[5].name, "side");
assert!(matches!(
params[5].default,
DefaultParamValue::String("both")
));
}
#[test]
fn test_jitter_normal_distribution() {
let jitter = Jitter;
let df = make_test_df();
let mut layer = make_test_layer();
layer.partition_by = vec![]; layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("normal".to_string()),
);
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
let mean: f64 = offsets.iter().sum::<f64>() / offsets.len() as f64;
assert!(
mean.abs() < 0.5,
"Normal distribution mean {} should be close to 0",
mean
);
}
#[test]
fn test_jitter_distribution_from_str() {
assert_eq!(
JitterDistribution::from_str("uniform"),
JitterDistribution::Uniform
);
assert_eq!(
JitterDistribution::from_str("normal"),
JitterDistribution::Normal
);
assert_eq!(
JitterDistribution::from_str("gaussian"),
JitterDistribution::Normal
);
assert_eq!(
JitterDistribution::from_str("density"),
JitterDistribution::Density
);
assert_eq!(
JitterDistribution::from_str("DENSITY"),
JitterDistribution::Density
);
assert_eq!(
JitterDistribution::from_str("NORMAL"),
JitterDistribution::Normal
);
assert_eq!(
JitterDistribution::from_str("intensity"),
JitterDistribution::Intensity
);
assert_eq!(
JitterDistribution::from_str("INTENSITY"),
JitterDistribution::Intensity
);
assert_eq!(
JitterDistribution::from_str("unknown"),
JitterDistribution::Uniform
);
}
#[test]
fn test_jitter_density_requires_one_continuous_axis() {
let jitter = Jitter;
let df = make_test_df();
let mut layer = make_test_layer();
layer.partition_by = vec![]; layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("density".to_string()),
);
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_discrete_scale("pos2"));
let result = jitter.apply_adjustment(df.clone(), &layer, &spec);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("requires exactly one continuous axis"));
let mut spec = Plot::new();
spec.scales.push(make_continuous_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let result = jitter.apply_adjustment(df.clone(), &layer, &spec);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("requires exactly one continuous axis"));
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let result = jitter.apply_adjustment(df, &layer, &spec);
assert!(result.is_ok());
}
#[test]
fn test_jitter_density_distribution() {
let jitter = Jitter;
let df = df! {
"__ggsql_aes_pos1__" => vec!["A", "A", "A", "A", "A", "A", "A"],
"__ggsql_aes_pos2__" => vec![1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0],
"__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
}
.unwrap();
let mut layer = Layer::new(Geom::bar());
layer.mappings = {
let mut m = Mappings::new();
m.insert(
"pos1",
AestheticValue::standard_column("__ggsql_aes_pos1__"),
);
m.insert(
"pos2",
AestheticValue::standard_column("__ggsql_aes_pos2__"),
);
m.insert(
"pos2end",
AestheticValue::standard_column("__ggsql_aes_pos2end__"),
);
m
};
layer.partition_by = vec![];
layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("density".to_string()),
);
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
assert_eq!(offsets.len(), 7);
}
#[test]
fn test_jitter_density_per_group() {
let jitter = Jitter;
let df = df! {
"__ggsql_aes_pos1__" => vec!["A", "A", "A", "A", "A", "A"],
"__ggsql_aes_pos2__" => vec![1.0, 1.0, 1.0, 3.0, 3.0, 3.0],
"__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
"__ggsql_aes_fill__" => vec!["X", "X", "X", "Y", "Y", "Y"],
}
.unwrap();
let mut layer = Layer::new(Geom::bar());
layer.mappings = {
let mut m = Mappings::new();
m.insert(
"pos1",
AestheticValue::standard_column("__ggsql_aes_pos1__"),
);
m.insert(
"pos2",
AestheticValue::standard_column("__ggsql_aes_pos2__"),
);
m.insert(
"pos2end",
AestheticValue::standard_column("__ggsql_aes_pos2end__"),
);
m.insert(
"fill",
AestheticValue::standard_column("__ggsql_aes_fill__"),
);
m
};
layer.partition_by = vec!["__ggsql_aes_fill__".to_string()];
layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("density".to_string()),
);
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
assert_eq!(offsets.len(), 6);
let fill_arr = result.column("__ggsql_aes_fill__").unwrap();
let fill_str = as_str(fill_arr).unwrap();
let mut group_x_offsets = vec![];
let mut group_y_offsets = vec![];
for i in 0..result.height() {
let fill_val = fill_str.value(i);
let offset_val = offset.value(i);
if fill_val.contains('X') {
group_x_offsets.push(offset_val);
} else {
group_y_offsets.push(offset_val);
}
}
let x_mean: f64 = group_x_offsets.iter().sum::<f64>() / group_x_offsets.len() as f64;
let y_mean: f64 = group_y_offsets.iter().sum::<f64>() / group_y_offsets.len() as f64;
assert!(
x_mean < y_mean,
"Group X mean ({}) should be less than Group Y mean ({})",
x_mean,
y_mean
);
}
#[test]
fn test_silverman_bandwidth() {
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let bandwidth = super::silverman_bandwidth(&values, 1.0);
assert!(bandwidth > 0.0);
let constant = vec![5.0, 5.0, 5.0, 5.0, 5.0];
let bandwidth = super::silverman_bandwidth(&constant, 1.0);
assert_eq!(bandwidth, 1.0);
let single = vec![5.0];
let bandwidth = super::silverman_bandwidth(&single, 1.0);
assert_eq!(bandwidth, 1.0);
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let bw_default = super::silverman_bandwidth(&values, 1.0);
let bw_double = super::silverman_bandwidth(&values, 2.0);
assert!(
(bw_double - bw_default * 2.0).abs() < 1e-10,
"Bandwidth with adjust=2.0 should be twice the default"
);
}
#[test]
fn test_compute_densities() {
let values = vec![0.0, 0.0, 0.0, 5.0, 10.0];
let bandwidth = 1.0;
let densities = super::compute_densities(&values, bandwidth);
assert!(densities[0] > densities[4]);
assert!(densities[1] > densities[4]);
assert!(densities[2] > densities[4]);
}
#[test]
fn test_compute_intensities() {
let values = vec![1.0, 1.0, 1.0, 5.0, 10.0];
let bandwidth = 1.0;
let densities = super::compute_densities(&values, bandwidth);
let intensities = super::compute_intensities(&values, bandwidth);
let n = values.len() as f64;
for (d, i) in densities.iter().zip(intensities.iter()) {
assert!(
(i - d * n).abs() < 1e-10,
"Intensity {} should be {} times density {}",
i,
n,
d
);
}
}
#[test]
fn test_jitter_intensity_requires_one_continuous_axis() {
let jitter = Jitter;
let df = make_test_df();
let mut layer = make_test_layer();
layer.partition_by = vec![]; layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("intensity".to_string()),
);
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_discrete_scale("pos2"));
let result = jitter.apply_adjustment(df.clone(), &layer, &spec);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("requires exactly one continuous axis"));
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let result = jitter.apply_adjustment(df, &layer, &spec);
assert!(result.is_ok());
}
#[test]
fn test_jitter_intensity_distribution() {
let jitter = Jitter;
let df = df! {
"__ggsql_aes_pos1__" => vec!["A", "A", "A", "A", "A", "A", "A"],
"__ggsql_aes_pos2__" => vec![1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0],
"__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
}
.unwrap();
let mut layer = Layer::new(Geom::bar());
layer.mappings = {
let mut m = Mappings::new();
m.insert(
"pos1",
AestheticValue::standard_column("__ggsql_aes_pos1__"),
);
m.insert(
"pos2",
AestheticValue::standard_column("__ggsql_aes_pos2__"),
);
m.insert(
"pos2end",
AestheticValue::standard_column("__ggsql_aes_pos2end__"),
);
m
};
layer.partition_by = vec![];
layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("intensity".to_string()),
);
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
assert_eq!(offsets.len(), 7);
}
#[test]
fn test_jitter_intensity_global_normalization() {
let jitter = Jitter;
let df = df! {
"__ggsql_aes_pos1__" => vec!["A", "A", "A", "A", "A", "B", "B"],
"__ggsql_aes_pos2__" => vec![1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0],
"__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
}
.unwrap();
let mut layer = Layer::new(Geom::bar());
layer.mappings = {
let mut m = Mappings::new();
m.insert(
"pos1",
AestheticValue::standard_column("__ggsql_aes_pos1__"),
);
m.insert(
"pos2",
AestheticValue::standard_column("__ggsql_aes_pos2__"),
);
m.insert(
"pos2end",
AestheticValue::standard_column("__ggsql_aes_pos2end__"),
);
m
};
layer.partition_by = vec![];
layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("intensity".to_string()),
);
layer
.parameters
.insert("dodge".to_string(), ParameterValue::Boolean(false));
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offset = as_f64(offset_col).unwrap();
let offsets: Vec<f64> = (0..offset.len()).map(|i| offset.value(i)).collect();
assert_eq!(offsets.len(), 7);
}
#[test]
fn test_jitter_density_explicit_bandwidth() {
let jitter = Jitter;
let df = df! {
"__ggsql_aes_pos1__" => vec!["A", "A", "A", "A", "A"],
"__ggsql_aes_pos2__" => vec![1.0, 1.0, 1.0, 2.0, 3.0],
"__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0, 0.0],
}
.unwrap();
let mut layer = Layer::new(Geom::bar());
layer.mappings = {
let mut m = Mappings::new();
m.insert(
"pos1",
AestheticValue::standard_column("__ggsql_aes_pos1__"),
);
m.insert(
"pos2",
AestheticValue::standard_column("__ggsql_aes_pos2__"),
);
m.insert(
"pos2end",
AestheticValue::standard_column("__ggsql_aes_pos2end__"),
);
m
};
layer.partition_by = vec![];
layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("density".to_string()),
);
layer
.parameters
.insert("bandwidth".to_string(), ParameterValue::Number(0.5));
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let result = jitter.apply_adjustment(df, &layer, &spec);
assert!(result.is_ok(), "Should succeed with explicit bandwidth");
}
#[test]
fn test_jitter_density_adjust_parameter() {
let jitter = Jitter;
let df = df! {
"__ggsql_aes_pos1__" => vec!["A", "A", "A", "A", "A"],
"__ggsql_aes_pos2__" => vec![1.0, 1.0, 1.0, 2.0, 3.0],
"__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0, 0.0],
}
.unwrap();
let mut layer = Layer::new(Geom::bar());
layer.mappings = {
let mut m = Mappings::new();
m.insert(
"pos1",
AestheticValue::standard_column("__ggsql_aes_pos1__"),
);
m.insert(
"pos2",
AestheticValue::standard_column("__ggsql_aes_pos2__"),
);
m.insert(
"pos2end",
AestheticValue::standard_column("__ggsql_aes_pos2end__"),
);
m
};
layer.partition_by = vec![];
layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("density".to_string()),
);
layer
.parameters
.insert("adjust".to_string(), ParameterValue::Number(2.0));
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let result = jitter.apply_adjustment(df, &layer, &spec);
assert!(result.is_ok(), "Should succeed with adjust parameter");
}
#[test]
fn test_quantile_cont() {
let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let q0 = super::quantile_cont(&sorted, 0.0);
assert!((q0 - 1.0).abs() < 1e-10);
let q1 = super::quantile_cont(&sorted, 1.0);
assert!((q1 - 5.0).abs() < 1e-10);
let q50 = super::quantile_cont(&sorted, 0.5);
assert!((q50 - 3.0).abs() < 1e-10);
let q25 = super::quantile_cont(&sorted, 0.25);
assert!((q25 - 2.0).abs() < 1e-10);
let q75 = super::quantile_cont(&sorted, 0.75);
assert!((q75 - 4.0).abs() < 1e-10);
}
#[test]
fn test_jitter_side_value_mapping() {
assert_eq!(JitterSide::from_str("right"), JitterSide::Positive);
assert_eq!(JitterSide::from_str("top"), JitterSide::Positive);
assert_eq!(JitterSide::from_str("left"), JitterSide::Negative);
assert_eq!(JitterSide::from_str("bottom"), JitterSide::Negative);
assert_eq!(JitterSide::from_str("both"), JitterSide::Both);
assert_eq!(JitterSide::from_str("anything-else"), JitterSide::Both);
assert_eq!(JitterSide::Positive.fold(0.3), 0.3);
assert_eq!(JitterSide::Positive.fold(-0.3), 0.3);
assert_eq!(JitterSide::Negative.fold(0.3), -0.3);
assert_eq!(JitterSide::Negative.fold(-0.3), -0.3);
assert_eq!(JitterSide::Both.fold(0.3), 0.3);
assert_eq!(JitterSide::Both.fold(-0.3), -0.3);
}
#[test]
fn test_jitter_side_pos1_one_sided_no_dodge() {
let jitter = Jitter;
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
for (side, expect_positive) in [
("right", true),
("top", true),
("left", false),
("bottom", false),
] {
let mut layer = make_test_layer();
layer.partition_by = vec![]; layer
.parameters
.insert("dodge".to_string(), ParameterValue::Boolean(false));
layer
.parameters
.insert("side".to_string(), ParameterValue::String(side.to_string()));
let (result, _) = jitter
.apply_adjustment(make_test_df(), &layer, &spec)
.unwrap();
let offset_col = result.column("__ggsql_aes_pos1offset__").unwrap();
let offsets = as_f64(offset_col).unwrap();
for i in 0..offsets.len() {
let v = offsets.value(i);
if expect_positive {
assert!(
v >= 0.0,
"side={} should produce non-negative pos1offset, got {}",
side,
v
);
} else {
assert!(
v <= 0.0,
"side={} should produce non-positive pos1offset, got {}",
side,
v
);
}
assert!(
v.abs() <= 0.45 + 1e-9,
"magnitude should stay within width/2"
);
}
}
}
#[test]
fn test_jitter_side_pos2_one_sided_no_dodge() {
let jitter = Jitter;
let mut spec = Plot::new();
spec.scales.push(make_continuous_scale("pos1"));
spec.scales.push(make_discrete_scale("pos2"));
for (side, expect_positive) in [
("top", true),
("right", true),
("bottom", false),
("left", false),
] {
let mut layer = make_test_layer();
layer.partition_by = vec![];
layer
.parameters
.insert("dodge".to_string(), ParameterValue::Boolean(false));
layer
.parameters
.insert("side".to_string(), ParameterValue::String(side.to_string()));
let (result, _) = jitter
.apply_adjustment(make_test_df(), &layer, &spec)
.unwrap();
let offset_col = result.column("__ggsql_aes_pos2offset__").unwrap();
let offsets = as_f64(offset_col).unwrap();
for i in 0..offsets.len() {
let v = offsets.value(i);
if expect_positive {
assert!(v >= 0.0, "side={} pos2offset should be ≥0, got {}", side, v);
} else {
assert!(v <= 0.0, "side={} pos2offset should be ≤0, got {}", side, v);
}
}
}
}
#[test]
fn test_jitter_side_with_dodge_keeps_full_band_width() {
let jitter = Jitter;
let df = make_test_df();
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let mut layer = make_test_layer(); layer.parameters.insert(
"side".to_string(),
ParameterValue::String("right".to_string()),
);
let (result, _) = jitter.apply_adjustment(df, &layer, &spec).unwrap();
let offsets = as_f64(result.column("__ggsql_aes_pos1offset__").unwrap()).unwrap();
let fill = as_str(result.column("__ggsql_aes_fill__").unwrap()).unwrap();
for i in 0..offsets.len() {
let v = offsets.value(i);
let group = fill.value(i);
let center = if group == "X" { -0.225 } else { 0.225 };
assert!(
(v - center) >= -1e-9,
"row {i} group {group}: offset {v} should be ≥ center {center}"
);
assert!(
(v - center) <= 0.225 + 1e-9,
"row {i} group {group}: offset {v} should be ≤ center + width/2"
);
}
let max_offset = (0..offsets.len())
.map(|i| offsets.value(i))
.fold(f64::MIN, f64::max);
assert!(
max_offset <= 0.45 + 1e-9,
"max offset {} should not exceed full-width upper bound 0.45",
max_offset
);
}
#[test]
fn test_jitter_side_normal_distribution() {
let jitter = Jitter;
let mut spec = Plot::new();
spec.scales.push(make_discrete_scale("pos1"));
spec.scales.push(make_continuous_scale("pos2"));
let mut layer = make_test_layer();
layer.partition_by = vec![];
layer
.parameters
.insert("dodge".to_string(), ParameterValue::Boolean(false));
layer.parameters.insert(
"distribution".to_string(),
ParameterValue::String("normal".to_string()),
);
layer.parameters.insert(
"side".to_string(),
ParameterValue::String("right".to_string()),
);
let (result, _) = jitter
.apply_adjustment(make_test_df(), &layer, &spec)
.unwrap();
let offsets = as_f64(result.column("__ggsql_aes_pos1offset__").unwrap()).unwrap();
for i in 0..offsets.len() {
assert!(
offsets.value(i) >= 0.0,
"normal+side=right should yield non-negative offsets"
);
}
}
}