1#[derive(Debug, Clone, Copy, PartialEq)]
19pub enum DecimateMethod {
20 Lttb,
22 MinMax,
24}
25
26pub fn lttb(x: &[f64], y: &[f64], threshold: usize) -> Vec<usize> {
40 assert_eq!(x.len(), y.len(), "x and y must have the same length");
41
42 let n = x.len();
43
44 if threshold == 0 {
46 return vec![];
47 }
48 if n == 0 {
49 return vec![];
50 }
51 if threshold == 1 {
52 return vec![0];
54 }
55 if threshold >= n {
56 return (0..n).collect();
57 }
58
59 let valid: Vec<usize> = (0..n)
61 .filter(|&i| x[i].is_finite() && y[i].is_finite())
62 .collect();
63 let valid_n = valid.len();
64
65 if valid_n == 0 {
66 return vec![];
67 }
68 if threshold >= valid_n {
69 return valid;
70 }
71 if threshold == 1 {
72 return vec![valid[0]];
73 }
74
75 let mut selected = Vec::with_capacity(threshold);
76
77 selected.push(valid[0]);
79
80 let bucket_count = threshold - 2;
81 let interior = valid_n - 2;
83
84 let mut prev_selected_idx = 0usize; for bucket_i in 0..bucket_count {
87 let bucket_start = 1 + (bucket_i * interior) / bucket_count;
89 let bucket_end = 1 + ((bucket_i + 1) * interior) / bucket_count;
90
91 let next_start = if bucket_i + 1 < bucket_count {
93 1 + ((bucket_i + 1) * interior) / bucket_count
94 } else {
95 valid_n - 1
96 };
97 let next_end = if bucket_i + 1 < bucket_count {
98 1 + ((bucket_i + 2) * interior) / bucket_count
99 } else {
100 valid_n
101 };
102
103 let next_count = (next_end - next_start) as f64;
105 let (avg_x, avg_y) = if next_count > 0.0 {
106 let mut sx = 0.0;
107 let mut sy = 0.0;
108 for &vi in &valid[next_start..next_end] {
109 sx += x[vi];
110 sy += y[vi];
111 }
112 (sx / next_count, sy / next_count)
113 } else {
114 let li = valid[valid_n - 1];
116 (x[li], y[li])
117 };
118
119 let prev_orig = valid[prev_selected_idx];
121 let (px, py) = (x[prev_orig], y[prev_orig]);
122
123 let mut best_area = -1.0_f64;
125 let mut best_valid_idx = bucket_start;
126
127 for (vi, &orig) in valid.iter().enumerate().skip(bucket_start).take(bucket_end - bucket_start) {
128 let area = triangle_area(px, py, x[orig], y[orig], avg_x, avg_y);
129 if area > best_area {
130 best_area = area;
131 best_valid_idx = vi;
132 }
133 }
134
135 selected.push(valid[best_valid_idx]);
136 prev_selected_idx = best_valid_idx;
137 }
138
139 selected.push(valid[valid_n - 1]);
141
142 selected
143}
144
145pub fn minmax(x: &[f64], y: &[f64], threshold: usize) -> Vec<usize> {
156 assert_eq!(x.len(), y.len(), "x and y must have the same length");
157
158 let n = x.len();
159
160 if threshold == 0 {
161 return vec![];
162 }
163 if n == 0 {
164 return vec![];
165 }
166 if threshold == 1 {
167 return vec![0];
168 }
169 if threshold >= n {
170 return (0..n).collect();
171 }
172
173 let valid: Vec<usize> = (0..n)
175 .filter(|&i| x[i].is_finite() && y[i].is_finite())
176 .collect();
177 let valid_n = valid.len();
178
179 if valid_n == 0 {
180 return vec![];
181 }
182 if threshold >= valid_n {
183 return valid;
184 }
185
186 let mut selected = Vec::with_capacity(threshold);
187
188 selected.push(valid[0]);
190
191 let pairs = (threshold - 2) / 2;
194 let bucket_count = if pairs == 0 { 1 } else { pairs };
195 let interior = valid_n - 2; for bucket_i in 0..bucket_count {
198 let bucket_start = 1 + (bucket_i * interior) / bucket_count;
199 let bucket_end = 1 + ((bucket_i + 1) * interior) / bucket_count;
200
201 if bucket_start >= bucket_end {
202 continue;
203 }
204
205 let mut min_idx = bucket_start;
206 let mut max_idx = bucket_start;
207 let mut min_val = y[valid[bucket_start]];
208 let mut max_val = y[valid[bucket_start]];
209
210 for vi in bucket_start..bucket_end {
211 let yv = y[valid[vi]];
212 if yv < min_val {
213 min_val = yv;
214 min_idx = vi;
215 }
216 if yv > max_val {
217 max_val = yv;
218 max_idx = vi;
219 }
220 }
221
222 if min_idx == max_idx {
224 selected.push(valid[min_idx]);
225 } else if min_idx < max_idx {
226 selected.push(valid[min_idx]);
227 selected.push(valid[max_idx]);
228 } else {
229 selected.push(valid[max_idx]);
230 selected.push(valid[min_idx]);
231 }
232 }
233
234 let last = valid[valid_n - 1];
236 if selected.last() != Some(&last) {
238 selected.push(last);
239 }
240
241 selected
242}
243
244#[inline]
252fn triangle_area(x_a: f64, y_a: f64, x_b: f64, y_b: f64, x_c: f64, y_c: f64) -> f64 {
253 (x_a * (y_b - y_c) + x_b * (y_c - y_a) + x_c * (y_a - y_b)).abs()
254}
255
256#[cfg(test)]
261mod tests {
262 use super::*;
263 use std::f64::consts::PI;
264
265 #[test]
268 fn lttb_identity_when_threshold_ge_len() {
269 let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
270 let y = vec![0.0, 1.0, 4.0, 9.0, 16.0];
271 let indices = lttb(&x, &y, 5);
272 assert_eq!(indices, vec![0, 1, 2, 3, 4]);
273
274 let indices = lttb(&x, &y, 100);
275 assert_eq!(indices, vec![0, 1, 2, 3, 4]);
276 }
277
278 #[test]
279 fn lttb_first_and_last_always_preserved() {
280 let x: Vec<f64> = (0..100).map(|i| i as f64).collect();
281 let y: Vec<f64> = x.iter().map(|v| v * v).collect();
282 let indices = lttb(&x, &y, 10);
283 assert_eq!(*indices.first().unwrap(), 0);
284 assert_eq!(*indices.last().unwrap(), 99);
285 assert_eq!(indices.len(), 10);
286 }
287
288 #[test]
289 fn lttb_threshold_2_returns_first_and_last() {
290 let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
291 let y = vec![10.0, 20.0, 30.0, 40.0, 50.0];
292 let indices = lttb(&x, &y, 2);
293 assert_eq!(indices, vec![0, 4]);
294 }
295
296 #[test]
297 fn lttb_threshold_3_returns_first_middle_last() {
298 let x = vec![0.0, 1.0, 2.0, 3.0, 4.0];
299 let y = vec![0.0, 5.0, 0.0, 5.0, 0.0];
300 let indices = lttb(&x, &y, 3);
301 assert_eq!(indices.len(), 3);
302 assert_eq!(indices[0], 0);
303 assert_eq!(*indices.last().unwrap(), 4);
304 }
305
306 #[test]
307 fn lttb_known_triangle_area() {
308 let area = triangle_area(0.0, 0.0, 1.0, 0.0, 0.0, 1.0);
311 assert!((area - 1.0).abs() < 1e-12);
312
313 let area = triangle_area(0.0, 0.0, 1.0, 1.0, 2.0, 2.0);
315 assert!(area.abs() < 1e-12);
316 }
317
318 #[test]
319 fn lttb_linear_data_any_subset_looks_identical() {
320 let n = 50;
323 let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
324 let y: Vec<f64> = x.iter().map(|v| 2.0 * v + 1.0).collect();
325 let indices = lttb(&x, &y, 10);
326 assert_eq!(indices.len(), 10);
327
328 for &idx in &indices {
330 let expected = 2.0 * x[idx] + 1.0;
331 assert!(
332 (y[idx] - expected).abs() < 1e-12,
333 "point at index {} deviates from y = 2x + 1",
334 idx
335 );
336 }
337 }
338
339 #[test]
340 fn lttb_sinusoidal_peaks_preserved() {
341 let n = 1000;
343 let x: Vec<f64> = (0..n).map(|i| i as f64 * 2.0 * PI / 100.0).collect();
344 let y: Vec<f64> = x.iter().map(|v| v.sin()).collect();
345
346 let indices = lttb(&x, &y, 50);
347
348 let selected_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
350 let max_selected = selected_y.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
351 let min_selected = selected_y.iter().cloned().fold(f64::INFINITY, f64::min);
352
353 assert!(max_selected > 0.95, "peak not preserved: max = {}", max_selected);
354 assert!(min_selected < -0.95, "trough not preserved: min = {}", min_selected);
355 }
356
357 #[test]
358 fn lttb_single_point() {
359 let x = vec![42.0];
360 let y = vec![7.0];
361 let indices = lttb(&x, &y, 5);
362 assert_eq!(indices, vec![0]);
363 }
364
365 #[test]
366 fn lttb_two_points() {
367 let x = vec![1.0, 2.0];
368 let y = vec![3.0, 4.0];
369 let indices = lttb(&x, &y, 5);
370 assert_eq!(indices, vec![0, 1]);
371 }
372
373 #[test]
374 fn lttb_nan_handling() {
375 let x = vec![0.0, 1.0, f64::NAN, 3.0, 4.0, 5.0];
376 let y = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
377 let indices = lttb(&x, &y, 4);
378 assert!(!indices.contains(&2), "NaN index should not be selected");
380 assert_eq!(*indices.first().unwrap(), 0);
382 assert_eq!(*indices.last().unwrap(), 5);
383 }
384
385 #[test]
386 fn lttb_empty_input() {
387 let x: Vec<f64> = vec![];
388 let y: Vec<f64> = vec![];
389 let indices = lttb(&x, &y, 10);
390 assert!(indices.is_empty());
391 }
392
393 #[test]
394 fn lttb_threshold_zero() {
395 let x = vec![0.0, 1.0, 2.0];
396 let y = vec![0.0, 1.0, 2.0];
397 let indices = lttb(&x, &y, 0);
398 assert!(indices.is_empty());
399 }
400
401 #[test]
402 fn lttb_threshold_one() {
403 let x = vec![0.0, 1.0, 2.0];
404 let y = vec![0.0, 1.0, 2.0];
405 let indices = lttb(&x, &y, 1);
406 assert_eq!(indices, vec![0]);
407 }
408
409 #[test]
412 fn minmax_preserves_extremes() {
413 let x: Vec<f64> = (0..20).map(|i| i as f64).collect();
415 let mut y: Vec<f64> = vec![0.0; 20];
416 y[5] = 100.0; y[15] = -100.0; let indices = minmax(&x, &y, 10);
420 let selected_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
421
422 assert!(selected_y.contains(&100.0), "spike not preserved");
423 assert!(selected_y.contains(&-100.0), "dip not preserved");
424 }
425
426 #[test]
427 fn minmax_identity_when_threshold_ge_len() {
428 let x = vec![0.0, 1.0, 2.0, 3.0];
429 let y = vec![1.0, 2.0, 3.0, 4.0];
430 let indices = minmax(&x, &y, 10);
431 assert_eq!(indices, vec![0, 1, 2, 3]);
432 }
433
434 #[test]
435 fn minmax_first_and_last_preserved() {
436 let x: Vec<f64> = (0..50).map(|i| i as f64).collect();
437 let y: Vec<f64> = x.iter().map(|v| v.sin()).collect();
438 let indices = minmax(&x, &y, 10);
439 assert_eq!(*indices.first().unwrap(), 0);
440 assert_eq!(*indices.last().unwrap(), 49);
441 }
442
443 #[test]
444 fn minmax_empty_input() {
445 let indices = minmax(&[], &[], 5);
446 assert!(indices.is_empty());
447 }
448
449 #[test]
450 fn minmax_threshold_zero() {
451 let indices = minmax(&[1.0, 2.0], &[3.0, 4.0], 0);
452 assert!(indices.is_empty());
453 }
454
455 #[test]
456 fn minmax_nan_handling() {
457 let x = vec![0.0, 1.0, f64::NAN, 3.0, 4.0];
458 let y = vec![0.0, 10.0, 5.0, -10.0, 0.0];
459 let indices = minmax(&x, &y, 4);
460 assert!(!indices.contains(&2), "NaN index should not be selected");
461 }
462
463 #[test]
466 fn lttb_large_dataset_smoke() {
467 let n = 100_000;
468 let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
469 let y: Vec<f64> = x.iter().map(|v| (v * 0.01).sin() + (v * 0.1).cos()).collect();
470
471 let threshold = 500;
472 let indices = lttb(&x, &y, threshold);
473
474 assert_eq!(indices.len(), threshold);
475 assert_eq!(indices[0], 0);
476 assert_eq!(*indices.last().unwrap(), n - 1);
477
478 for w in indices.windows(2) {
480 assert!(w[0] < w[1], "indices must be strictly increasing");
481 }
482 }
483
484 #[test]
485 fn minmax_large_dataset_smoke() {
486 let n = 100_000;
487 let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
488 let y: Vec<f64> = x.iter().map(|v| (v * 0.01).sin()).collect();
489
490 let threshold = 500;
491 let indices = minmax(&x, &y, threshold);
492
493 assert!(indices.len() <= threshold);
494 assert_eq!(indices[0], 0);
495 assert_eq!(*indices.last().unwrap(), n - 1);
496
497 for w in indices.windows(2) {
499 assert!(w[0] < w[1], "indices must be strictly increasing");
500 }
501 }
502
503 #[test]
506 fn lttb_bucket_boundaries_no_gaps_or_overlaps() {
507 let n = 12;
510 let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
511 let y: Vec<f64> = x.clone();
512
513 let threshold = 6;
514 let indices = lttb(&x, &y, threshold);
515
516 assert_eq!(indices.len(), threshold);
517 assert_eq!(indices[0], 0);
518 assert_eq!(*indices.last().unwrap(), n as usize - 1);
519
520 for &idx in &indices {
522 assert!(idx < n as usize);
523 }
524 }
525
526 #[test]
527 fn lttb_all_nan_returns_empty() {
528 let x = vec![f64::NAN; 5];
529 let y = vec![f64::NAN; 5];
530 let indices = lttb(&x, &y, 3);
531 assert!(indices.is_empty());
532 }
533
534 #[test]
535 fn minmax_all_nan_returns_empty() {
536 let x = vec![f64::NAN; 5];
537 let y = vec![f64::NAN; 5];
538 let indices = minmax(&x, &y, 3);
539 assert!(indices.is_empty());
540 }
541
542 #[test]
543 fn lttb_indices_are_sorted() {
544 let n = 200;
545 let x: Vec<f64> = (0..n).map(|i| i as f64).collect();
546 let y: Vec<f64> = x.iter().map(|v| (v * 0.1).sin()).collect();
547 let indices = lttb(&x, &y, 20);
548 for w in indices.windows(2) {
549 assert!(w[0] < w[1], "LTTB indices must be strictly increasing");
550 }
551 }
552}