1use std::io::Read;
2
3use crate::{
4 vbyte::{Decoder8, Encoder8},
5 Kmer,
6};
7
8pub struct CompressedKmerList {
10 bytes: Vec<u8>,
11 count: usize,
12 last: Kmer,
13}
14
15impl CompressedKmerList {
16 pub fn new() -> CompressedKmerList {
18 CompressedKmerList {
19 bytes: Vec::new(),
20 count: 0,
21 last: Kmer(0),
22 }
23 }
24
25 pub fn len(&self) -> usize {
27 self.count
28 }
29
30 pub fn push(&mut self, x: Kmer) {
34 assert!(x >= self.last);
35 let d = x.0 - self.last.0;
36 Encoder8::encode(d, &mut self.bytes).unwrap();
37 self.count += 1;
38 self.last = x;
39 }
40
41 pub fn iter(&self) -> CompressedKmerListIterator<'_, impl Iterator<Item = &'_ u8>> {
43 CompressedKmerListIterator::new(self.bytes.iter())
44 }
45
46 pub fn from_iter<Src>(src: Src) -> CompressedKmerList
48 where
49 Src: Iterator<Item = Kmer>,
50 {
51 let mut res = CompressedKmerList::new();
52 for x in src {
53 res.push(x);
54 }
55 res
56 }
57}
58
59pub struct CompressedKmerListIterator<'a, Src>
61where
62 Src: Iterator<Item = &'a u8>,
63{
64 bytes: IteratorReadAdaptor<'a, Src>,
65 last: Kmer,
66}
67
68impl<'a, Src> CompressedKmerListIterator<'a, Src>
69where
70 Src: Iterator<Item = &'a u8>,
71{
72 pub fn new(bytes: Src) -> CompressedKmerListIterator<'a, Src> {
74 CompressedKmerListIterator {
75 bytes: IteratorReadAdaptor::new(bytes),
76 last: Kmer(0),
77 }
78 }
79}
80
81impl<'a, Src> Iterator for CompressedKmerListIterator<'a, Src>
82where
83 Src: Iterator<Item = &'a u8>,
84{
85 type Item = Kmer;
86 fn next(&mut self) -> Option<Self::Item> {
87 match Decoder8::decode(&mut self.bytes).unwrap() {
88 None => None,
89 Some(d) => {
90 self.last.0 += d;
91 Some(self.last.clone())
92 }
93 }
94 }
95}
96
97pub struct CompressedKmerFrequencyList {
99 bytes: Vec<u8>,
100 count: usize,
101 last: Kmer,
102}
103
104impl CompressedKmerFrequencyList {
105 pub fn new() -> CompressedKmerFrequencyList {
107 CompressedKmerFrequencyList {
108 bytes: Vec::new(),
109 count: 0,
110 last: Kmer(0),
111 }
112 }
113
114 pub fn len(&self) -> usize {
116 self.count
117 }
118
119 pub fn push(&mut self, x_f: (Kmer, usize)) {
123 let (x, f) = x_f;
124 assert!(x >= self.last);
125 let d = x.0 - self.last.0;
126 Encoder8::encode(d, &mut self.bytes).unwrap();
127 Encoder8::encode(f as u64, &mut self.bytes).unwrap();
128 self.count += 1;
129 self.last = x;
130 }
131
132 pub fn iter(&self) -> CompressedKmerFrequencyListIterator<'_, impl Iterator<Item = &'_ u8>> {
134 CompressedKmerFrequencyListIterator::new(self.bytes.iter())
135 }
136
137 pub fn from_iter<Src>(src: Src) -> CompressedKmerFrequencyList
139 where
140 Src: Iterator<Item = (Kmer, usize)>,
141 {
142 let mut res = CompressedKmerFrequencyList::new();
143 for x_f in src {
144 res.push(x_f);
145 }
146 res
147 }
148}
149
150pub struct CompressedKmerFrequencyListIterator<'a, Src>
152where
153 Src: Iterator<Item = &'a u8>,
154{
155 bytes: IteratorReadAdaptor<'a, Src>,
156 last: Kmer,
157}
158
159impl<'a, Src> CompressedKmerFrequencyListIterator<'a, Src>
160where
161 Src: Iterator<Item = &'a u8>,
162{
163 pub fn new(bytes: Src) -> CompressedKmerFrequencyListIterator<'a, Src> {
165 CompressedKmerFrequencyListIterator {
166 bytes: IteratorReadAdaptor::new(bytes),
167 last: Kmer(0),
168 }
169 }
170}
171
172impl<'a, Src> Iterator for CompressedKmerFrequencyListIterator<'a, Src>
173where
174 Src: Iterator<Item = &'a u8>,
175{
176 type Item = (Kmer, usize);
177 fn next(&mut self) -> Option<Self::Item> {
178 match Decoder8::decode(&mut self.bytes).unwrap() {
179 None => None,
180 Some(d) => {
181 self.last.0 += d;
182 let f = Decoder8::decode(&mut self.bytes).unwrap().unwrap();
183 Some((self.last.clone(), f as usize))
184 }
185 }
186 }
187}
188
189pub struct IteratorReadAdaptor<'a, Src>
191where
192 Src: Iterator<Item = &'a u8>,
193{
194 itr: Src,
195}
196
197impl<'a, Src> IteratorReadAdaptor<'a, Src>
198where
199 Src: Iterator<Item = &'a u8>,
200{
201 pub fn new(src: Src) -> IteratorReadAdaptor<'a, Src> {
203 IteratorReadAdaptor { itr: src }
204 }
205}
206
207impl<'a, Src> Read for IteratorReadAdaptor<'a, Src>
208where
209 Src: Iterator<Item = &'a u8>,
210{
211 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
212 if buf.len() == 0 {
213 return Ok(0);
214 }
215
216 let mut count = 0;
217 while let Some(b) = self.itr.next() {
218 buf[count] = *b;
219 count += 1;
220 if count == buf.len() {
221 break;
222 }
223 }
224 Ok(count)
225 }
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231
232 struct MiniRng {
233 x: u64,
234 }
235
236 impl MiniRng {
237 fn new(seed: u64) -> MiniRng {
238 MiniRng { x: seed }
239 }
240
241 fn rnd(&mut self) -> u64 {
242 self.x = self.x.wrapping_mul(2862933555777941757u64);
243 self.x = self.x.wrapping_add(3037000493u64);
244 self.x
245 }
246
247 fn freq(&mut self) -> u64 {
248 let mut f: f64 = 0.0;
249 let mut u = self.rnd();
250 while u > 0 {
251 if u & 1 == 1 {
252 f += 1.0;
253 }
254 f /= 2.0;
255 u >>= 1;
256 }
257 (1.0 / f).floor() as u64
258 }
259 }
260
261 #[test]
262 fn test_1() {
263 let k = 7;
264 let m = (1u64 << (2 * k)) - 1;
265 let mut rng = MiniRng::new(19);
266 let mut xs = Vec::new();
267 for _i in 0..100 {
268 let x = Kmer(rng.rnd() & m);
269 xs.push(x);
270 }
271 xs.sort();
272 let c = CompressedKmerList::from_iter(xs.iter().map(|x| x.clone()));
273 assert_eq!(c.len(), xs.len());
274 let ys = Vec::from_iter(c.iter());
275 assert_eq!(ys, xs);
276 }
277
278 #[test]
279 fn test_2() {
280 let k = 7;
281 let m = (1u64 << (2 * k)) - 1;
282 let mut rng = MiniRng::new(19);
283 let mut xs = Vec::new();
284 for _i in 0..100 {
285 let x = Kmer(rng.rnd() & m);
286 let f = rng.freq();
287 xs.push((x, f as usize));
288 }
289 xs.sort();
290 let c = CompressedKmerFrequencyList::from_iter(xs.iter().map(|(x,f)| (x.clone(), *f)));
291 assert_eq!(c.len(), xs.len());
292 let ys = Vec::from_iter(c.iter());
293 assert_eq!(ys, xs);
294 }}