1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//! Samplers are given a series of data, and write some of them
use crate::prelude::*;
// data[0..size] holds the data, which is
// if jump == 1, contents are just the lines seen so far.
// else it's first line, stuff, last line
// stuff is every "jump" lines starting with line "jump"
/// Sample N evenly distributed items from an input stream
#[derive(Debug)]
pub struct Smooth {
small: usize, // goal size
large: usize, // maximum size
data: Vec<Vec<u8>>, // always length large
jump: usize, // number of lines between entries
pos: usize, // number of lines between last and penultimate entries in data
size: usize, // current number of 'live' elements in data
}
impl Smooth {
/// keep best sz items
#[must_use]
pub fn new(sz: usize) -> Self {
let small = sz.max(2);
let large = small * 2 - 2;
let mut data = Vec::with_capacity(large);
for _ in 0..large {
data.push(Vec::new());
}
Self { small, large, data, jump: 1, pos: 0, size: 0 }
}
fn verify(&self) -> bool {
debug_assert!(self.data.len() == self.large);
debug_assert!(self.size <= self.large);
debug_assert!(self.size <= self.large);
// debug_assert!(self.pos <= self.jump);
true
}
/// add one more item
pub fn add(&mut self, x: &[u8]) {
debug_assert!(self.verify());
if self.pos == 0 {
if self.size >= self.large {
self.jump *= 2;
self.collapse();
}
self.data[self.size].clear();
self.data[self.size].extend(x);
self.size += 1;
if self.jump > 1 {
self.pos += 1;
}
} else {
self.data[self.size - 1].clear();
self.data[self.size - 1].extend(x);
self.pos += 1;
debug_assert!(self.verify());
let limit = if self.size == self.large { 2 * self.jump } else { self.jump };
if self.pos >= limit {
self.pos = 0;
}
}
debug_assert!(self.verify());
}
// crunch down to self.small elements
// maintain first and last, spread the rest evenly.
#[allow(clippy::cast_precision_loss)]
fn collapse(&mut self) {
debug_assert!(self.verify());
let old_size = self.size;
let new_size = self.small;
if old_size <= new_size {
return;
}
if new_size == 2 {
self.data.swap(1, self.size - 1);
self.size = 2;
return;
}
let new_end = new_size - 1;
let ratio = (old_size - 2) as f64 / (new_size - 2) as f64;
for i in 1..new_end {
let pos = ((i as f64) * ratio) as usize;
self.data.swap(i, pos);
}
self.data.swap(new_end, self.size - 1);
self.size = new_size;
}
/// finish and write
pub fn finalize(&mut self, w: &mut impl Write) -> Result<()> {
self.collapse();
self.write(w)
}
/// write the lines
pub fn write(&self, w: &mut impl Write) -> Result<()> {
for i in 0..self.size {
w.write_all(&self.data[i])?;
}
Ok(())
}
/*
fn dump(&self) {
for i in 0..self.size {
prerr_n(&[&self.data[i]]);
}
}
*/
}