#[derive(Debug, Clone, Copy, PartialEq)]
pub enum DecimateMethod {
Lttb,
MinMax,
}
pub fn lttb(x: &[f64], y: &[f64], threshold: usize) -> Vec<usize> {
assert_eq!(x.len(), y.len(), "x and y must have the same length");
let n = x.len();
if threshold == 0 {
return vec![];
}
if n == 0 {
return vec![];
}
if threshold == 1 {
return vec![0];
}
if threshold >= n {
return (0..n).collect();
}
let valid: Vec<usize> = (0..n)
.filter(|&i| x[i].is_finite() && y[i].is_finite())
.collect();
let valid_n = valid.len();
if valid_n == 0 {
return vec![];
}
if threshold >= valid_n {
return valid;
}
if threshold == 1 {
return vec![valid[0]];
}
let mut selected = Vec::with_capacity(threshold);
selected.push(valid[0]);
let bucket_count = threshold - 2;
let interior = valid_n - 2;
let mut prev_selected_idx = 0usize;
for bucket_i in 0..bucket_count {
let bucket_start = 1 + (bucket_i * interior) / bucket_count;
let bucket_end = 1 + ((bucket_i + 1) * interior) / bucket_count;
let next_start = if bucket_i + 1 < bucket_count {
1 + ((bucket_i + 1) * interior) / bucket_count
} else {
valid_n - 1
};
let next_end = if bucket_i + 1 < bucket_count {
1 + ((bucket_i + 2) * interior) / bucket_count
} else {
valid_n
};
let next_count = (next_end - next_start) as f64;
let (avg_x, avg_y) = if next_count > 0.0 {
let mut sx = 0.0;
let mut sy = 0.0;
for &vi in &valid[next_start..next_end] {
sx += x[vi];
sy += y[vi];
}
(sx / next_count, sy / next_count)
} else {
let li = valid[valid_n - 1];
(x[li], y[li])
};
let prev_orig = valid[prev_selected_idx];
let (px, py) = (x[prev_orig], y[prev_orig]);
let mut best_area = -1.0_f64;
let mut best_valid_idx = bucket_start;
for (vi, &orig) in valid.iter().enumerate().skip(bucket_start).take(bucket_end - bucket_start) {
let area = triangle_area(px, py, x[orig], y[orig], avg_x, avg_y);
if area > best_area {
best_area = area;
best_valid_idx = vi;
}
}
selected.push(valid[best_valid_idx]);
prev_selected_idx = best_valid_idx;
}
selected.push(valid[valid_n - 1]);
selected
}
pub fn minmax(x: &[f64], y: &[f64], threshold: usize) -> Vec<usize> {
assert_eq!(x.len(), y.len(), "x and y must have the same length");
let n = x.len();
if threshold == 0 {
return vec![];
}
if n == 0 {
return vec![];
}
if threshold == 1 {
return vec![0];
}
if threshold >= n {
return (0..n).collect();
}
let valid: Vec<usize> = (0..n)
.filter(|&i| x[i].is_finite() && y[i].is_finite())
.collect();
let valid_n = valid.len();
if valid_n == 0 {
return vec![];
}
if threshold >= valid_n {
return valid;
}
let mut selected = Vec::with_capacity(threshold);
selected.push(valid[0]);
let pairs = (threshold - 2) / 2;
let bucket_count = if pairs == 0 { 1 } else { pairs };
let interior = valid_n - 2;
for bucket_i in 0..bucket_count {
let bucket_start = 1 + (bucket_i * interior) / bucket_count;
let bucket_end = 1 + ((bucket_i + 1) * interior) / bucket_count;
if bucket_start >= bucket_end {
continue;
}
let mut min_idx = bucket_start;
let mut max_idx = bucket_start;
let mut min_val = y[valid[bucket_start]];
let mut max_val = y[valid[bucket_start]];
for vi in bucket_start..bucket_end {
let yv = y[valid[vi]];
if yv < min_val {
min_val = yv;
min_idx = vi;
}
if yv > max_val {
max_val = yv;
max_idx = vi;
}
}
if min_idx == max_idx {
selected.push(valid[min_idx]);
} else if min_idx < max_idx {
selected.push(valid[min_idx]);
selected.push(valid[max_idx]);
} else {
selected.push(valid[max_idx]);
selected.push(valid[min_idx]);
}
}
let last = valid[valid_n - 1];
if selected.last() != Some(&last) {
selected.push(last);
}
selected
}
#[inline]
fn triangle_area(x_a: f64, y_a: f64, x_b: f64, y_b: f64, x_c: f64, y_c: f64) -> f64 {
(x_a * (y_b - y_c) + x_b * (y_c - y_a) + x_c * (y_a - y_b)).abs()
}
#[cfg(test)]
mod tests {
use super::*;
use std::f64::consts::PI;
#[test]
fn lttb_identity_when_threshold_ge_len() {
let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
let y = vec![0.0, 1.0, 4.0, 9.0, 16.0];
let indices = lttb(&x, &y, 5);
assert_eq!(indices, vec![0, 1, 2, 3, 4]);
let indices = lttb(&x, &y, 100);
assert_eq!(indices, vec![0, 1, 2, 3, 4]);
}
#[test]
fn lttb_first_and_last_always_preserved() {
let x: Vec<f64> = (0..100).map(|i| i as f64).collect();
let y: Vec<f64> = x.iter().map(|v| v * v).collect();
let indices = lttb(&x, &y, 10);
assert_eq!(*indices.first().unwrap(), 0);
assert_eq!(*indices.last().unwrap(), 99);
assert_eq!(indices.len(), 10);
}
#[test]
fn lttb_threshold_2_returns_first_and_last() {
let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
let y = vec![10.0, 20.0, 30.0, 40.0, 50.0];
let indices = lttb(&x, &y, 2);
assert_eq!(indices, vec![0, 4]);
}
#[test]
fn lttb_threshold_3_returns_first_middle_last() {
let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
let y = vec![0.0, 5.0, 0.0, 5.0, 0.0];
let indices = lttb(&x, &y, 3);
assert_eq!(indices.len(), 3);
assert_eq!(indices[0], 0);
assert_eq!(*indices.last().unwrap(), 4);
}
#[test]
fn lttb_known_triangle_area() {
let area = triangle_area(0.0, 0.0, 1.0, 0.0, 0.0, 1.0);
assert!((area - 1.0).abs() < 1e-12);
let area = triangle_area(0.0, 0.0, 1.0, 1.0, 2.0, 2.0);
assert!(area.abs() < 1e-12);
}
#[test]
fn lttb_linear_data_any_subset_looks_identical() {
let n = 50;
let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
let y: Vec<f64> = x.iter().map(|v| 2.0 * v + 1.0).collect();
let indices = lttb(&x, &y, 10);
assert_eq!(indices.len(), 10);
for &idx in &indices {
let expected = 2.0 * x[idx] + 1.0;
assert!(
(y[idx] - expected).abs() < 1e-12,
"point at index {} deviates from y = 2x + 1",
idx
);
}
}
#[test]
fn lttb_sinusoidal_peaks_preserved() {
let n = 1000;
let x: Vec<f64> = (0..n).map(|i| i as f64 * 2.0 * PI / 100.0).collect();
let y: Vec<f64> = x.iter().map(|v| v.sin()).collect();
let indices = lttb(&x, &y, 50);
let selected_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
let max_selected = selected_y.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let min_selected = selected_y.iter().cloned().fold(f64::INFINITY, f64::min);
assert!(max_selected > 0.95, "peak not preserved: max = {}", max_selected);
assert!(min_selected < -0.95, "trough not preserved: min = {}", min_selected);
}
#[test]
fn lttb_single_point() {
let x = vec![42.0];
let y = vec![7.0];
let indices = lttb(&x, &y, 5);
assert_eq!(indices, vec![0]);
}
#[test]
fn lttb_two_points() {
let x = vec![1.0, 2.0];
let y = vec![3.0, 4.0];
let indices = lttb(&x, &y, 5);
assert_eq!(indices, vec![0, 1]);
}
#[test]
fn lttb_nan_handling() {
let x = vec![0.0, 1.0, f64::NAN, 3.0, 4.0, 5.0];
let y = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
let indices = lttb(&x, &y, 4);
assert!(!indices.contains(&2), "NaN index should not be selected");
assert_eq!(*indices.first().unwrap(), 0);
assert_eq!(*indices.last().unwrap(), 5);
}
#[test]
fn lttb_empty_input() {
let x: Vec<f64> = vec![];
let y: Vec<f64> = vec![];
let indices = lttb(&x, &y, 10);
assert!(indices.is_empty());
}
#[test]
fn lttb_threshold_zero() {
let x = vec![0.0, 1.0, 2.0];
let y = vec![0.0, 1.0, 2.0];
let indices = lttb(&x, &y, 0);
assert!(indices.is_empty());
}
#[test]
fn lttb_threshold_one() {
let x = vec![0.0, 1.0, 2.0];
let y = vec![0.0, 1.0, 2.0];
let indices = lttb(&x, &y, 1);
assert_eq!(indices, vec![0]);
}
#[test]
fn minmax_preserves_extremes() {
let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
let mut y: Vec<f64> = vec![0.0; 20];
y[5] = 100.0; y[15] = -100.0;
let indices = minmax(&x, &y, 10);
let selected_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
assert!(selected_y.contains(&100.0), "spike not preserved");
assert!(selected_y.contains(&-100.0), "dip not preserved");
}
#[test]
fn minmax_identity_when_threshold_ge_len() {
let x = vec![0.0, 1.0, 2.0, 3.0];
let y = vec![1.0, 2.0, 3.0, 4.0];
let indices = minmax(&x, &y, 10);
assert_eq!(indices, vec![0, 1, 2, 3]);
}
#[test]
fn minmax_first_and_last_preserved() {
let x: Vec<f64> = (0..50).map(|i| i as f64).collect();
let y: Vec<f64> = x.iter().map(|v| v.sin()).collect();
let indices = minmax(&x, &y, 10);
assert_eq!(*indices.first().unwrap(), 0);
assert_eq!(*indices.last().unwrap(), 49);
}
#[test]
fn minmax_empty_input() {
let indices = minmax(&[], &[], 5);
assert!(indices.is_empty());
}
#[test]
fn minmax_threshold_zero() {
let indices = minmax(&[1.0, 2.0], &[3.0, 4.0], 0);
assert!(indices.is_empty());
}
#[test]
fn minmax_nan_handling() {
let x = vec![0.0, 1.0, f64::NAN, 3.0, 4.0];
let y = vec![0.0, 10.0, 5.0, -10.0, 0.0];
let indices = minmax(&x, &y, 4);
assert!(!indices.contains(&2), "NaN index should not be selected");
}
#[test]
fn lttb_large_dataset_smoke() {
let n = 100_000;
let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
let y: Vec<f64> = x.iter().map(|v| (v * 0.01).sin() + (v * 0.1).cos()).collect();
let threshold = 500;
let indices = lttb(&x, &y, threshold);
assert_eq!(indices.len(), threshold);
assert_eq!(indices[0], 0);
assert_eq!(*indices.last().unwrap(), n - 1);
for w in indices.windows(2) {
assert!(w[0] < w[1], "indices must be strictly increasing");
}
}
#[test]
fn minmax_large_dataset_smoke() {
let n = 100_000;
let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
let y: Vec<f64> = x.iter().map(|v| (v * 0.01).sin()).collect();
let threshold = 500;
let indices = minmax(&x, &y, threshold);
assert!(indices.len() <= threshold);
assert_eq!(indices[0], 0);
assert_eq!(*indices.last().unwrap(), n - 1);
for w in indices.windows(2) {
assert!(w[0] < w[1], "indices must be strictly increasing");
}
}
#[test]
fn lttb_bucket_boundaries_no_gaps_or_overlaps() {
let n = 12;
let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
let y: Vec<f64> = x.clone();
let threshold = 6;
let indices = lttb(&x, &y, threshold);
assert_eq!(indices.len(), threshold);
assert_eq!(indices[0], 0);
assert_eq!(*indices.last().unwrap(), n as usize - 1);
for &idx in &indices {
assert!(idx < n as usize);
}
}
#[test]
fn lttb_all_nan_returns_empty() {
let x = vec![f64::NAN; 5];
let y = vec![f64::NAN; 5];
let indices = lttb(&x, &y, 3);
assert!(indices.is_empty());
}
#[test]
fn minmax_all_nan_returns_empty() {
let x = vec![f64::NAN; 5];
let y = vec![f64::NAN; 5];
let indices = minmax(&x, &y, 3);
assert!(indices.is_empty());
}
#[test]
fn lttb_indices_are_sorted() {
let n = 200;
let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
let y: Vec<f64> = x.iter().map(|v| (v * 0.1).sin()).collect();
let indices = lttb(&x, &y, 20);
for w in indices.windows(2) {
assert!(w[0] < w[1], "LTTB indices must be strictly increasing");
}
}
}