plotkit_core/
decimate.rs

1//! Downsampling algorithms for large datasets.
2//!
3//! When plotting millions of data points, rendering every single point is
4//! wasteful: a typical screen has at most a few thousand horizontal pixels,
5//! so most points overlap and contribute nothing to the visual output.
6//! Decimation reduces the point count to a manageable size while preserving
7//! the perceived shape of the data.
8//!
9//! Two algorithms are provided:
10//!
11//! - **LTTB** (Largest Triangle Three Buckets) — the gold standard for
12//!   perceptually faithful line downsampling. Based on the 2013 paper by
13//!   Sveinn Steinarsson.
14//! - **MinMax** — a faster alternative that keeps the min and max y-value
15//!   in each bucket, ensuring peaks and troughs are never lost.
16
17/// The decimation strategy to apply when rendering a line.
18#[derive(Debug, Clone, Copy, PartialEq)]
19pub enum DecimateMethod {
20    /// Largest Triangle Three Buckets algorithm (best visual fidelity).
21    Lttb,
22    /// Min-max decimation (fastest, preserves extremes).
23    MinMax,
24}
25
26/// Downsamples a series of (x, y) points to `threshold` points using the
27/// Largest Triangle Three Buckets algorithm.
28///
29/// LTTB preserves the visual shape of the line while dramatically reducing
30/// point count. A 1M-point series decimated to 1000 points is visually
31/// indistinguishable from the original at screen resolution.
32///
33/// Returns indices into the original arrays. The first and last points
34/// are always preserved.
35///
36/// # Panics
37///
38/// Panics if `x.len() != y.len()`.
39pub fn lttb(x: &[f64], y: &[f64], threshold: usize) -> Vec<usize> {
40    assert_eq!(x.len(), y.len(), "x and y must have the same length");
41
42    let n = x.len();
43
44    // Edge cases: nothing to decimate.
45    if threshold == 0 {
46        return vec![];
47    }
48    if n == 0 {
49        return vec![];
50    }
51    if threshold == 1 {
52        // Return only the first point.
53        return vec![0];
54    }
55    if threshold >= n {
56        return (0..n).collect();
57    }
58
59    // Filter out NaN points, keeping track of original indices.
60    let valid: Vec<usize> = (0..n)
61        .filter(|&i| x[i].is_finite() && y[i].is_finite())
62        .collect();
63    let valid_n = valid.len();
64
65    if valid_n == 0 {
66        return vec![];
67    }
68    if threshold >= valid_n {
69        return valid;
70    }
71    if threshold == 1 {
72        return vec![valid[0]];
73    }
74
75    let mut selected = Vec::with_capacity(threshold);
76
77    // Always select the first valid point.
78    selected.push(valid[0]);
79
80    let bucket_count = threshold - 2;
81    // Remaining points to distribute across buckets (excluding first and last).
82    let interior = valid_n - 2;
83
84    let mut prev_selected_idx = 0usize; // index into `valid`
85
86    for bucket_i in 0..bucket_count {
87        // Current bucket range in `valid` indices (1-based, skipping first point).
88        let bucket_start = 1 + (bucket_i * interior) / bucket_count;
89        let bucket_end = 1 + ((bucket_i + 1) * interior) / bucket_count;
90
91        // Next bucket range (or last point if this is the last bucket).
92        let next_start = if bucket_i + 1 < bucket_count {
93            1 + ((bucket_i + 1) * interior) / bucket_count
94        } else {
95            valid_n - 1
96        };
97        let next_end = if bucket_i + 1 < bucket_count {
98            1 + ((bucket_i + 2) * interior) / bucket_count
99        } else {
100            valid_n
101        };
102
103        // Compute average point of the next bucket.
104        let next_count = (next_end - next_start) as f64;
105        let (avg_x, avg_y) = if next_count > 0.0 {
106            let mut sx = 0.0;
107            let mut sy = 0.0;
108            for &vi in &valid[next_start..next_end] {
109                sx += x[vi];
110                sy += y[vi];
111            }
112            (sx / next_count, sy / next_count)
113        } else {
114            // Fallback: use the last point.
115            let li = valid[valid_n - 1];
116            (x[li], y[li])
117        };
118
119        // Previously selected point coordinates.
120        let prev_orig = valid[prev_selected_idx];
121        let (px, py) = (x[prev_orig], y[prev_orig]);
122
123        // Find the point in the current bucket that maximizes triangle area.
124        let mut best_area = -1.0_f64;
125        let mut best_valid_idx = bucket_start;
126
127        for (vi, &orig) in valid.iter().enumerate().skip(bucket_start).take(bucket_end - bucket_start) {
128            let area = triangle_area(px, py, x[orig], y[orig], avg_x, avg_y);
129            if area > best_area {
130                best_area = area;
131                best_valid_idx = vi;
132            }
133        }
134
135        selected.push(valid[best_valid_idx]);
136        prev_selected_idx = best_valid_idx;
137    }
138
139    // Always select the last valid point.
140    selected.push(valid[valid_n - 1]);
141
142    selected
143}
144
145/// Min-max decimation: keeps the min and max y-value in each bucket.
146/// Faster than LTTB, good for dense time series where peaks matter.
147///
148/// Returns indices into the original arrays. The first and last points
149/// are always preserved. Within each bucket, the min and max points are
150/// returned in their original order (preserving temporal coherence).
151///
152/// # Panics
153///
154/// Panics if `x.len() != y.len()`.
155pub fn minmax(x: &[f64], y: &[f64], threshold: usize) -> Vec<usize> {
156    assert_eq!(x.len(), y.len(), "x and y must have the same length");
157
158    let n = x.len();
159
160    if threshold == 0 {
161        return vec![];
162    }
163    if n == 0 {
164        return vec![];
165    }
166    if threshold == 1 {
167        return vec![0];
168    }
169    if threshold >= n {
170        return (0..n).collect();
171    }
172
173    // Filter out NaN points.
174    let valid: Vec<usize> = (0..n)
175        .filter(|&i| x[i].is_finite() && y[i].is_finite())
176        .collect();
177    let valid_n = valid.len();
178
179    if valid_n == 0 {
180        return vec![];
181    }
182    if threshold >= valid_n {
183        return valid;
184    }
185
186    let mut selected = Vec::with_capacity(threshold);
187
188    // Always select the first valid point.
189    selected.push(valid[0]);
190
191    // Each bucket produces two points (min and max), so we need
192    // (threshold - 2) / 2 buckets. Handle odd thresholds gracefully.
193    let pairs = (threshold - 2) / 2;
194    let bucket_count = if pairs == 0 { 1 } else { pairs };
195    let interior = valid_n - 2; // points between first and last
196
197    for bucket_i in 0..bucket_count {
198        let bucket_start = 1 + (bucket_i * interior) / bucket_count;
199        let bucket_end = 1 + ((bucket_i + 1) * interior) / bucket_count;
200
201        if bucket_start >= bucket_end {
202            continue;
203        }
204
205        let mut min_idx = bucket_start;
206        let mut max_idx = bucket_start;
207        let mut min_val = y[valid[bucket_start]];
208        let mut max_val = y[valid[bucket_start]];
209
210        for vi in bucket_start..bucket_end {
211            let yv = y[valid[vi]];
212            if yv < min_val {
213                min_val = yv;
214                min_idx = vi;
215            }
216            if yv > max_val {
217                max_val = yv;
218                max_idx = vi;
219            }
220        }
221
222        // Add min and max in original order.
223        if min_idx == max_idx {
224            selected.push(valid[min_idx]);
225        } else if min_idx < max_idx {
226            selected.push(valid[min_idx]);
227            selected.push(valid[max_idx]);
228        } else {
229            selected.push(valid[max_idx]);
230            selected.push(valid[min_idx]);
231        }
232    }
233
234    // Always select the last valid point.
235    let last = valid[valid_n - 1];
236    // Avoid duplicating the last point if it was already selected.
237    if selected.last() != Some(&last) {
238        selected.push(last);
239    }
240
241    selected
242}
243
244/// Computes twice the area of the triangle formed by three points.
245///
246/// Using the shoelace formula:
247///   area = |x_a(y_b - y_c) + x_b(y_c - y_a) + x_c(y_a - y_b)| / 2
248///
249/// We return the value without the `/2` since we only need relative
250/// comparisons and avoiding the division is faster.
251#[inline]
252fn triangle_area(x_a: f64, y_a: f64, x_b: f64, y_b: f64, x_c: f64, y_c: f64) -> f64 {
253    (x_a * (y_b - y_c) + x_b * (y_c - y_a) + x_c * (y_a - y_b)).abs()
254}
255
256// ---------------------------------------------------------------------------
257// Tests
258// ---------------------------------------------------------------------------
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use std::f64::consts::PI;
264
265    // -- LTTB tests --------------------------------------------------------
266
267    #[test]
268    fn lttb_identity_when_threshold_ge_len() {
269        let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
270        let y = vec![0.0, 1.0, 4.0, 9.0, 16.0];
271        let indices = lttb(&x, &y, 5);
272        assert_eq!(indices, vec![0, 1, 2, 3, 4]);
273
274        let indices = lttb(&x, &y, 100);
275        assert_eq!(indices, vec![0, 1, 2, 3, 4]);
276    }
277
278    #[test]
279    fn lttb_first_and_last_always_preserved() {
280        let x: Vec<f64> = (0..100).map(|i| i as f64).collect();
281        let y: Vec<f64> = x.iter().map(|v| v * v).collect();
282        let indices = lttb(&x, &y, 10);
283        assert_eq!(*indices.first().unwrap(), 0);
284        assert_eq!(*indices.last().unwrap(), 99);
285        assert_eq!(indices.len(), 10);
286    }
287
288    #[test]
289    fn lttb_threshold_2_returns_first_and_last() {
290        let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
291        let y = vec![10.0, 20.0, 30.0, 40.0, 50.0];
292        let indices = lttb(&x, &y, 2);
293        assert_eq!(indices, vec![0, 4]);
294    }
295
296    #[test]
297    fn lttb_threshold_3_returns_first_middle_last() {
298        let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
299        let y = vec![0.0, 5.0, 0.0, 5.0, 0.0];
300        let indices = lttb(&x, &y, 3);
301        assert_eq!(indices.len(), 3);
302        assert_eq!(indices[0], 0);
303        assert_eq!(*indices.last().unwrap(), 4);
304    }
305
306    #[test]
307    fn lttb_known_triangle_area() {
308        // Triangle with vertices (0,0), (1,0), (0,1) has area 0.5.
309        // Our internal function returns 2*area = 1.0.
310        let area = triangle_area(0.0, 0.0, 1.0, 0.0, 0.0, 1.0);
311        assert!((area - 1.0).abs() < 1e-12);
312
313        // Degenerate (collinear) triangle has area 0.
314        let area = triangle_area(0.0, 0.0, 1.0, 1.0, 2.0, 2.0);
315        assert!(area.abs() < 1e-12);
316    }
317
318    #[test]
319    fn lttb_linear_data_any_subset_looks_identical() {
320        // For perfectly linear data y = 2x + 1, all triangles have zero area.
321        // LTTB should still return `threshold` points, all lying on the line.
322        let n = 50;
323        let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
324        let y: Vec<f64> = x.iter().map(|v| 2.0 * v + 1.0).collect();
325        let indices = lttb(&x, &y, 10);
326        assert_eq!(indices.len(), 10);
327
328        // All selected points should satisfy y = 2x + 1.
329        for &idx in &indices {
330            let expected = 2.0 * x[idx] + 1.0;
331            assert!(
332                (y[idx] - expected).abs() < 1e-12,
333                "point at index {} deviates from y = 2x + 1",
334                idx
335            );
336        }
337    }
338
339    #[test]
340    fn lttb_sinusoidal_peaks_preserved() {
341        // Generate a sine wave and verify that LTTB keeps the peaks.
342        let n = 1000;
343        let x: Vec<f64> = (0..n).map(|i| i as f64 * 2.0 * PI / 100.0).collect();
344        let y: Vec<f64> = x.iter().map(|v| v.sin()).collect();
345
346        let indices = lttb(&x, &y, 50);
347
348        // The global max (~1.0) and min (~-1.0) should be close to the selected set.
349        let selected_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
350        let max_selected = selected_y.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
351        let min_selected = selected_y.iter().cloned().fold(f64::INFINITY, f64::min);
352
353        assert!(max_selected > 0.95, "peak not preserved: max = {}", max_selected);
354        assert!(min_selected < -0.95, "trough not preserved: min = {}", min_selected);
355    }
356
357    #[test]
358    fn lttb_single_point() {
359        let x = vec![42.0];
360        let y = vec![7.0];
361        let indices = lttb(&x, &y, 5);
362        assert_eq!(indices, vec![0]);
363    }
364
365    #[test]
366    fn lttb_two_points() {
367        let x = vec![1.0, 2.0];
368        let y = vec![3.0, 4.0];
369        let indices = lttb(&x, &y, 5);
370        assert_eq!(indices, vec![0, 1]);
371    }
372
373    #[test]
374    fn lttb_nan_handling() {
375        let x = vec![0.0, 1.0, f64::NAN, 3.0, 4.0, 5.0];
376        let y = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
377        let indices = lttb(&x, &y, 4);
378        // NaN at index 2 should be skipped entirely.
379        assert!(!indices.contains(&2), "NaN index should not be selected");
380        // First valid (0) and last valid (5) should be present.
381        assert_eq!(*indices.first().unwrap(), 0);
382        assert_eq!(*indices.last().unwrap(), 5);
383    }
384
385    #[test]
386    fn lttb_empty_input() {
387        let x: Vec<f64> = vec![];
388        let y: Vec<f64> = vec![];
389        let indices = lttb(&x, &y, 10);
390        assert!(indices.is_empty());
391    }
392
393    #[test]
394    fn lttb_threshold_zero() {
395        let x = vec![0.0, 1.0, 2.0];
396        let y = vec![0.0, 1.0, 2.0];
397        let indices = lttb(&x, &y, 0);
398        assert!(indices.is_empty());
399    }
400
401    #[test]
402    fn lttb_threshold_one() {
403        let x = vec![0.0, 1.0, 2.0];
404        let y = vec![0.0, 1.0, 2.0];
405        let indices = lttb(&x, &y, 1);
406        assert_eq!(indices, vec![0]);
407    }
408
409    // -- MinMax tests ------------------------------------------------------
410
411    #[test]
412    fn minmax_preserves_extremes() {
413        // Data with a clear spike and dip.
414        let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
415        let mut y: Vec<f64> = vec![0.0; 20];
416        y[5] = 100.0;  // spike
417        y[15] = -100.0; // dip
418
419        let indices = minmax(&x, &y, 10);
420        let selected_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
421
422        assert!(selected_y.contains(&100.0), "spike not preserved");
423        assert!(selected_y.contains(&-100.0), "dip not preserved");
424    }
425
426    #[test]
427    fn minmax_identity_when_threshold_ge_len() {
428        let x = vec![0.0, 1.0, 2.0, 3.0];
429        let y = vec![1.0, 2.0, 3.0, 4.0];
430        let indices = minmax(&x, &y, 10);
431        assert_eq!(indices, vec![0, 1, 2, 3]);
432    }
433
434    #[test]
435    fn minmax_first_and_last_preserved() {
436        let x: Vec<f64> = (0..50).map(|i| i as f64).collect();
437        let y: Vec<f64> = x.iter().map(|v| v.sin()).collect();
438        let indices = minmax(&x, &y, 10);
439        assert_eq!(*indices.first().unwrap(), 0);
440        assert_eq!(*indices.last().unwrap(), 49);
441    }
442
443    #[test]
444    fn minmax_empty_input() {
445        let indices = minmax(&[], &[], 5);
446        assert!(indices.is_empty());
447    }
448
449    #[test]
450    fn minmax_threshold_zero() {
451        let indices = minmax(&[1.0, 2.0], &[3.0, 4.0], 0);
452        assert!(indices.is_empty());
453    }
454
455    #[test]
456    fn minmax_nan_handling() {
457        let x = vec![0.0, 1.0, f64::NAN, 3.0, 4.0];
458        let y = vec![0.0, 10.0, 5.0, -10.0, 0.0];
459        let indices = minmax(&x, &y, 4);
460        assert!(!indices.contains(&2), "NaN index should not be selected");
461    }
462
463    // -- Large synthetic dataset smoke test --------------------------------
464
465    #[test]
466    fn lttb_large_dataset_smoke() {
467        let n = 100_000;
468        let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
469        let y: Vec<f64> = x.iter().map(|v| (v * 0.01).sin() + (v * 0.1).cos()).collect();
470
471        let threshold = 500;
472        let indices = lttb(&x, &y, threshold);
473
474        assert_eq!(indices.len(), threshold);
475        assert_eq!(indices[0], 0);
476        assert_eq!(*indices.last().unwrap(), n - 1);
477
478        // Indices should be strictly increasing (no duplicates, monotonic).
479        for w in indices.windows(2) {
480            assert!(w[0] < w[1], "indices must be strictly increasing");
481        }
482    }
483
484    #[test]
485    fn minmax_large_dataset_smoke() {
486        let n = 100_000;
487        let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
488        let y: Vec<f64> = x.iter().map(|v| (v * 0.01).sin()).collect();
489
490        let threshold = 500;
491        let indices = minmax(&x, &y, threshold);
492
493        assert!(indices.len() <= threshold);
494        assert_eq!(indices[0], 0);
495        assert_eq!(*indices.last().unwrap(), n - 1);
496
497        // Indices should be strictly increasing.
498        for w in indices.windows(2) {
499            assert!(w[0] < w[1], "indices must be strictly increasing");
500        }
501    }
502
503    // -- Bucket boundary correctness ---------------------------------------
504
505    #[test]
506    fn lttb_bucket_boundaries_no_gaps_or_overlaps() {
507        // With 12 points and threshold=6, we have 4 buckets over 10 interior
508        // points. Verify every interior point is in exactly one bucket.
509        let n = 12;
510        let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
511        let y: Vec<f64> = x.clone();
512
513        let threshold = 6;
514        let indices = lttb(&x, &y, threshold);
515
516        assert_eq!(indices.len(), threshold);
517        assert_eq!(indices[0], 0);
518        assert_eq!(*indices.last().unwrap(), n as usize - 1);
519
520        // All selected indices should be within range.
521        for &idx in &indices {
522            assert!(idx < n as usize);
523        }
524    }
525
526    #[test]
527    fn lttb_all_nan_returns_empty() {
528        let x = vec![f64::NAN; 5];
529        let y = vec![f64::NAN; 5];
530        let indices = lttb(&x, &y, 3);
531        assert!(indices.is_empty());
532    }
533
534    #[test]
535    fn minmax_all_nan_returns_empty() {
536        let x = vec![f64::NAN; 5];
537        let y = vec![f64::NAN; 5];
538        let indices = minmax(&x, &y, 3);
539        assert!(indices.is_empty());
540    }
541
542    #[test]
543    fn lttb_indices_are_sorted() {
544        let n = 200;
545        let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
546        let y: Vec<f64> = x.iter().map(|v| (v * 0.1).sin()).collect();
547        let indices = lttb(&x, &y, 20);
548        for w in indices.windows(2) {
549            assert!(w[0] < w[1], "LTTB indices must be strictly increasing");
550        }
551    }
552}
plotkit_core/decimate.rs

plotkit_core/
decimate.rs