1use std::io::Read;
2
3use crate::error::Error;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
6enum EncodingTypeV2 {
7 ShortRepeat,
8 Direct,
9 PatchedBase,
10 Delta,
11}
12
13fn header_to_rle_v2_short_repeated_width(header: u8) -> u8 {
14 (header & 0b00111000) >> 3
15}
16
17fn header_to_rle_v2_short_repeated_count(header: u8) -> u8 {
18 header & 0b00000111
19}
20
21fn rle_v2_direct_bit_width(value: u8) -> u8 {
22 match value {
23 0 => 1,
24 1 => 2,
25 3 => 4,
26 7 => 8,
27 15 => 16,
28 23 => 24,
29 27 => 32,
30 28 => 40,
31 29 => 48,
32 30 => 56,
33 31 => 64,
34 other => todo!("{other}"),
35 }
36}
37
38fn header_to_rle_v2_direct_bit_width(header: u8) -> u8 {
39 let bit_width = (header & 0b00111110) >> 1;
40 rle_v2_direct_bit_width(bit_width)
41}
42
43fn rle_v2_delta_bit_width(value: u8) -> u8 {
44 match value {
45 0 => 0,
46 1 => 2,
47 3 => 4,
48 7 => 8,
49 15 => 16,
50 23 => 24,
51 27 => 32,
52 28 => 40,
53 29 => 48,
54 30 => 56,
55 31 => 64,
56 other => todo!("{other}"),
57 }
58}
59
60fn header_to_rle_v2_delta_bit_width(header: u8) -> u8 {
61 let bit_width = (header & 0b00111110) >> 1;
62 rle_v2_delta_bit_width(bit_width)
63}
64
65fn header_to_rle_v2_direct_length(header: u8, header1: u8) -> u16 {
66 let bit = header & 0b00000001;
67 let r = u16::from_be_bytes([bit, header1]);
68 1 + r
69}
70
71fn unsigned_varint<R: Read>(reader: &mut R) -> Result<u64, Error> {
72 let mut i = 0u64;
73 let mut buf = [0u8; 1];
74 let mut j = 0;
75 loop {
76 if j > 9 {
77 return Err(Error::OutOfSpec);
79 }
80 reader.read_exact(&mut buf[..])?;
81 i |= (u64::from(buf[0] & 0x7F)) << (j * 7);
82 if (buf[0] >> 7) == 0 {
83 break;
84 } else {
85 j += 1;
86 }
87 }
88 Ok(i)
89}
90
91#[inline]
92fn zigzag(z: u64) -> i64 {
93 if z & 0x1 == 0 {
94 (z >> 1) as i64
95 } else {
96 !(z >> 1) as i64
97 }
98}
99
100fn signed_varint<R: Read>(reader: &mut R) -> Result<i64, Error> {
101 unsigned_varint(reader).map(zigzag)
102}
103
104#[inline]
105fn unpack(bytes: &[u8], num_bits: u8, index: usize) -> u64 {
106 if num_bits == 0 {
107 return 0;
108 };
109 let num_bits = num_bits as usize;
110 let start = num_bits * index; let length = num_bits; let byte_start = start / 8;
113 let byte_end = (start + length + 7) / 8;
114 let slice = &bytes[byte_start..byte_end];
116 let mut a = [0u8; 8];
117 for (i, item) in slice.iter().rev().enumerate() {
118 a[i] = *item;
119 }
120 let bits = u64::from_le_bytes(a);
121 let offset = (slice.len() * 8 - num_bits) % 8 - start % 8;
122 (bits >> offset) & (!0u64 >> (64 - num_bits))
123}
124
125#[derive(Debug)]
126pub struct UnsignedDirectRun {
127 data: Vec<u8>,
128 bit_width: u8,
129 index: usize,
130 length: usize,
131}
132
133impl UnsignedDirectRun {
134 #[inline]
135 pub fn try_new<R: Read>(
136 header: u8,
137 reader: &mut R,
138 mut scratch: Vec<u8>,
139 ) -> Result<Self, Error> {
140 let mut header1 = [0u8];
141 reader.read_exact(&mut header1)?;
142 let bit_width = header_to_rle_v2_direct_bit_width(header);
143
144 let length = header_to_rle_v2_direct_length(header, header1[0]);
145
146 let additional = ((bit_width as usize) * (length as usize) + 7) / 8;
147 scratch.clear();
148 scratch.reserve(additional);
149 reader.take(additional as u64).read_to_end(&mut scratch)?;
150
151 Ok(Self {
152 data: scratch,
153 bit_width,
154 index: 0,
155 length: length as usize,
156 })
157 }
158
159 #[inline]
160 pub fn len(&self) -> usize {
161 self.length - self.index
162 }
163}
164
165impl Iterator for UnsignedDirectRun {
166 type Item = u64;
167
168 #[inline]
169 fn next(&mut self) -> Option<Self::Item> {
170 (self.index != self.length).then(|| {
171 let index = self.index;
172 self.index += 1;
173 unpack(&self.data, self.bit_width, index)
174 })
175 }
176
177 #[inline]
178 fn size_hint(&self) -> (usize, Option<usize>) {
179 let remaining = self.len();
180 (remaining, Some(remaining))
181 }
182}
183
184pub struct UnsignedDeltaRun {
185 encoded_deltas: Vec<u8>,
186 bit_width: u8,
187 index: usize,
188 length: usize,
189 base: u64,
190 delta_base: i64,
191}
192
193impl UnsignedDeltaRun {
194 #[inline]
195 pub fn try_new<R: Read>(
196 header: u8,
197 reader: &mut R,
198 mut scratch: Vec<u8>,
199 ) -> Result<Self, Error> {
200 let mut header1 = [0u8];
201 reader.read_exact(&mut header1)?;
202 let bit_width = header_to_rle_v2_delta_bit_width(header);
203
204 let length = header_to_rle_v2_direct_length(header, header1[0]);
205
206 let base = unsigned_varint(reader)?;
207 let delta_base = signed_varint(reader)?;
208 let additional = ((length as usize - 2) * bit_width as usize + 7) / 8;
209
210 scratch.clear();
211 scratch.reserve(additional);
212 reader.take(additional as u64).read_to_end(&mut scratch)?;
213
214 Ok(Self {
215 base,
216 encoded_deltas: scratch,
217 bit_width,
218 index: 0,
219 length: length as usize,
220 delta_base,
221 })
222 }
223
224 #[inline]
225 pub fn len(&self) -> usize {
226 self.length - self.index
227 }
228
229 #[inline]
230 pub fn into_inner(mut self) -> Vec<u8> {
231 self.encoded_deltas.clear();
232 self.encoded_deltas
233 }
234}
235
236impl Iterator for UnsignedDeltaRun {
237 type Item = u64;
238
239 #[inline]
240 fn next(&mut self) -> Option<Self::Item> {
241 (self.index != self.length).then(|| {
242 let index = self.index;
243 if index == 0 {
244 self.index += 1;
245 return self.base;
246 }
247 if index == 1 || self.bit_width == 0 {
248 self.index += 1;
249 if self.delta_base > 0 {
250 self.base += self.delta_base as u64;
251 } else {
252 self.base -= (-self.delta_base) as u64;
253 }
254 return self.base;
255 }
256 self.index += 1;
257 let delta = unpack(&self.encoded_deltas, self.bit_width, index - 2);
258 if self.delta_base > 0 {
259 self.base += delta;
260 } else {
261 self.base -= delta;
262 }
263 self.base
264 })
265 }
266
267 #[inline]
268 fn size_hint(&self) -> (usize, Option<usize>) {
269 let remaining = self.len();
270 (remaining, Some(remaining))
271 }
272}
273
274#[derive(Debug)]
275pub struct UnsignedShortRepeat {
276 value: u64,
277 remaining: usize,
278 scratch: Vec<u8>,
279}
280
281impl UnsignedShortRepeat {
282 #[inline]
283 fn try_new<R: Read>(header: u8, reader: &mut R, mut scratch: Vec<u8>) -> Result<Self, Error> {
284 let width = 1 + header_to_rle_v2_short_repeated_width(header);
285 let count = 3 + header_to_rle_v2_short_repeated_count(header);
286
287 scratch.clear();
288 scratch.reserve(width as usize);
289 reader.take(width as u64).read_to_end(&mut scratch)?;
290
291 let mut a = [0u8; 8];
292 a[8 - scratch.len()..].copy_from_slice(&scratch);
293 let value = u64::from_be_bytes(a);
294 scratch.clear();
295
296 Ok(Self {
297 value,
298 remaining: count as usize,
299 scratch,
300 })
301 }
302
303 #[inline]
304 pub fn len(&self) -> usize {
305 self.remaining
306 }
307
308 #[inline]
309 pub fn into_inner(self) -> Vec<u8> {
310 self.scratch
311 }
312}
313
314impl Iterator for UnsignedShortRepeat {
315 type Item = u64;
316
317 #[inline]
318 fn next(&mut self) -> Option<Self::Item> {
319 (self.remaining != 0).then(|| {
320 self.remaining -= 1;
321 self.value
322 })
323 }
324
325 #[inline]
326 fn size_hint(&self) -> (usize, Option<usize>) {
327 (self.len(), Some(self.len()))
328 }
329}
330
331#[derive(Debug)]
332pub struct SignedDeltaRun {
333 encoded_deltas: Vec<u8>,
334 bit_width: u8,
335 index: usize,
336 length: usize,
337 base: i64,
338 delta_base: i64,
339}
340
341impl SignedDeltaRun {
342 #[inline]
343 fn try_new<R: Read>(header: u8, reader: &mut R, mut scratch: Vec<u8>) -> Result<Self, Error> {
344 let mut header1 = [0u8];
345 reader.read_exact(&mut header1)?;
346 let bit_width = header_to_rle_v2_delta_bit_width(header);
347
348 let length = header_to_rle_v2_direct_length(header, header1[0]);
349
350 let base = unsigned_varint(reader).map(zigzag)?;
351 let delta_base = signed_varint(reader)?;
352 let additional = ((length as usize - 2) * bit_width as usize + 7) / 8;
353
354 scratch.clear();
355 scratch.reserve(additional);
356 reader.take(additional as u64).read_to_end(&mut scratch)?;
357
358 Ok(Self {
359 base,
360 encoded_deltas: scratch,
361 bit_width,
362 index: 0,
363 length: length as usize,
364 delta_base,
365 })
366 }
367
368 pub fn len(&self) -> usize {
369 self.length - self.index
370 }
371
372 #[must_use]
373 pub fn is_empty(&self) -> bool {
374 self.len() == 0
375 }
376}
377
378impl Iterator for SignedDeltaRun {
379 type Item = i64;
380
381 #[inline]
382 fn next(&mut self) -> Option<Self::Item> {
383 (self.index != self.length).then(|| {
384 let index = self.index;
385 if index == 0 {
386 self.index += 1;
387 return self.base;
388 }
389 if index == 1 || self.bit_width == 0 {
390 self.index += 1;
391 if self.delta_base > 0 {
392 self.base += self.delta_base as i64;
393 } else {
394 self.base -= (-self.delta_base) as i64;
395 }
396 return self.base;
397 }
398 self.index += 1;
399 let delta = unpack(&self.encoded_deltas, self.bit_width, index - 2);
401 if self.delta_base > 0 {
402 self.base += delta as i64;
403 } else {
404 self.base -= delta as i64;
405 }
406 self.base
407 })
408 }
409
410 #[inline]
411 fn size_hint(&self) -> (usize, Option<usize>) {
412 let remaining = self.length - self.index;
413 (remaining, Some(remaining))
414 }
415}
416
417#[inline]
418fn run_encoding(header: u8) -> EncodingTypeV2 {
419 match (header & 128 == 128, header & 64 == 64) {
420 (true, true) => EncodingTypeV2::Delta,
422 (true, false) => EncodingTypeV2::PatchedBase,
424 (false, true) => EncodingTypeV2::Direct,
426 (false, false) => EncodingTypeV2::ShortRepeat,
428 }
429}
430
431pub enum UnsignedRleV2Run {
433 Direct(UnsignedDirectRun),
435 Delta(UnsignedDeltaRun),
437 ShortRepeat(UnsignedShortRepeat),
439}
440
441impl UnsignedRleV2Run {
442 pub fn try_new<R: Read>(reader: &mut R, scratch: Vec<u8>) -> Result<Self, Error> {
444 let mut header = [0u8];
445 reader.read_exact(&mut header)?;
446 let header = header[0];
447 let encoding = run_encoding(header);
448
449 match encoding {
450 EncodingTypeV2::Direct => {
451 UnsignedDirectRun::try_new(header, reader, scratch).map(Self::Direct)
452 }
453 EncodingTypeV2::Delta => {
454 UnsignedDeltaRun::try_new(header, reader, scratch).map(Self::Delta)
455 }
456 EncodingTypeV2::ShortRepeat => {
457 UnsignedShortRepeat::try_new(header, reader, scratch).map(Self::ShortRepeat)
458 }
459 other => todo!("{other:?}"),
460 }
461 }
462
463 pub fn len(&self) -> usize {
465 match self {
466 Self::Direct(run) => run.len(),
467 Self::Delta(run) => run.len(),
468 Self::ShortRepeat(run) => run.len(),
469 }
470 }
471
472 #[must_use]
474 pub fn is_empty(&self) -> bool {
475 self.len() == 0
476 }
477}
478
479pub struct UnsignedRleV2RunIter<R: Read> {
481 reader: R,
482 scratch: Vec<u8>,
483 length: usize,
484}
485
486impl<R: Read> UnsignedRleV2RunIter<R> {
487 pub fn new(reader: R, length: usize, scratch: Vec<u8>) -> Self {
489 Self {
490 reader,
491 scratch,
492 length,
493 }
494 }
495
496 pub fn into_inner(mut self) -> (R, Vec<u8>) {
498 self.scratch.clear();
499 (self.reader, self.scratch)
500 }
501}
502
503impl<R: Read> Iterator for UnsignedRleV2RunIter<R> {
504 type Item = Result<UnsignedRleV2Run, Error>;
505
506 fn next(&mut self) -> Option<Self::Item> {
507 (self.length != 0).then(|| {
508 let run =
509 UnsignedRleV2Run::try_new(&mut self.reader, std::mem::take(&mut self.scratch))?;
510 self.length -= run.len();
511 Ok(run)
512 })
513 }
514}
515
516pub struct UnsignedRleV2Iter<R: Read> {
518 current: Option<UnsignedRleV2Run>,
519 runs: UnsignedRleV2RunIter<R>,
520}
521
522impl<R: Read> UnsignedRleV2Iter<R> {
523 pub fn new(reader: R, length: usize, scratch: Vec<u8>) -> Self {
525 Self {
526 runs: UnsignedRleV2RunIter::new(reader, length, scratch),
527 current: None,
528 }
529 }
530
531 pub fn into_inner(self) -> (R, Vec<u8>) {
533 self.runs.into_inner()
534 }
535}
536
537impl<R: Read> Iterator for UnsignedRleV2Iter<R> {
538 type Item = Result<u64, Error>;
539
540 #[inline]
541 fn next(&mut self) -> Option<Self::Item> {
542 let next = if let Some(run) = &mut self.current {
543 match run {
544 UnsignedRleV2Run::Direct(values_iter) => values_iter.next(),
545 UnsignedRleV2Run::Delta(values_iter) => values_iter.next(),
546 UnsignedRleV2Run::ShortRepeat(values_iter) => values_iter.next(),
547 }
548 } else {
549 None
550 };
551
552 if next.is_none() {
553 match self.runs.next()? {
554 Ok(run) => self.current = Some(run),
555 Err(e) => return Some(Err(e)),
556 }
557 self.next()
558 } else {
559 next.map(Ok)
560 }
561 }
562}
563
564#[derive(Debug)]
565pub struct SignedDirectRun(UnsignedDirectRun);
566
567impl SignedDirectRun {
568 pub fn try_new<R: Read>(header: u8, reader: &mut R, scratch: Vec<u8>) -> Result<Self, Error> {
569 UnsignedDirectRun::try_new(header, reader, scratch).map(Self)
570 }
571
572 pub fn len(&self) -> usize {
573 self.0.len()
574 }
575
576 #[must_use]
578 pub fn is_empty(&self) -> bool {
579 self.len() == 0
580 }
581}
582
583impl Iterator for SignedDirectRun {
584 type Item = i64;
585
586 fn next(&mut self) -> Option<Self::Item> {
587 self.0.next().map(zigzag)
588 }
589
590 fn size_hint(&self) -> (usize, Option<usize>) {
591 self.0.size_hint()
592 }
593}
594
595#[derive(Debug)]
596pub struct SignedShortRepeat(UnsignedShortRepeat);
597
598impl SignedShortRepeat {
599 pub fn try_new<R: Read>(header: u8, reader: &mut R, scratch: Vec<u8>) -> Result<Self, Error> {
600 UnsignedShortRepeat::try_new(header, reader, scratch).map(Self)
601 }
602
603 pub fn len(&self) -> usize {
605 self.0.len()
606 }
607
608 #[must_use]
610 pub fn is_empty(&self) -> bool {
611 self.len() == 0
612 }
613}
614
615impl Iterator for SignedShortRepeat {
616 type Item = i64;
617
618 fn next(&mut self) -> Option<Self::Item> {
619 self.0.next().map(zigzag)
620 }
621
622 fn size_hint(&self) -> (usize, Option<usize>) {
623 self.0.size_hint()
624 }
625}
626
627#[derive(Debug)]
629pub enum SignedRleV2Run {
630 Direct(SignedDirectRun),
632 Delta(SignedDeltaRun),
634 ShortRepeat(SignedShortRepeat),
636}
637
638impl SignedRleV2Run {
639 pub fn try_new<R: Read>(reader: &mut R, scratch: Vec<u8>) -> Result<Self, Error> {
641 let mut header = [0u8];
642 reader.read_exact(&mut header)?;
643 let header = header[0];
644 let encoding = run_encoding(header);
645
646 match encoding {
647 EncodingTypeV2::Direct => {
648 SignedDirectRun::try_new(header, reader, scratch).map(Self::Direct)
649 }
650 EncodingTypeV2::Delta => {
651 SignedDeltaRun::try_new(header, reader, scratch).map(Self::Delta)
652 }
653 EncodingTypeV2::ShortRepeat => {
654 SignedShortRepeat::try_new(header, reader, scratch).map(Self::ShortRepeat)
655 }
656 other => todo!("{other:?}"),
657 }
658 }
659
660 pub fn len(&self) -> usize {
662 match self {
663 Self::Direct(run) => run.len(),
664 Self::Delta(run) => run.len(),
665 Self::ShortRepeat(run) => run.len(),
666 }
667 }
668
669 #[must_use]
671 pub fn is_empty(&self) -> bool {
672 self.len() == 0
673 }
674}
675
676pub struct SignedRleV2RunIter<R: Read> {
678 reader: R,
679 scratch: Vec<u8>,
680 length: usize,
681}
682
683impl<R: Read> SignedRleV2RunIter<R> {
684 pub fn new(reader: R, length: usize, scratch: Vec<u8>) -> Self {
686 Self {
687 reader,
688 scratch,
689 length,
690 }
691 }
692
693 pub fn into_inner(mut self) -> (R, Vec<u8>) {
694 self.scratch.clear();
695 (self.reader, self.scratch)
696 }
697}
698
699impl<R: Read> Iterator for SignedRleV2RunIter<R> {
700 type Item = Result<SignedRleV2Run, Error>;
701
702 #[inline]
703 fn next(&mut self) -> Option<Self::Item> {
704 (self.length != 0).then(|| {
705 let run = SignedRleV2Run::try_new(&mut self.reader, std::mem::take(&mut self.scratch))?;
706 self.length -= run.len();
707 Ok(run)
708 })
709 }
710}
711
712pub struct SignedRleV2Iter<R: Read> {
714 current: Option<SignedRleV2Run>,
715 runs: SignedRleV2RunIter<R>,
716}
717
718impl<R: Read> SignedRleV2Iter<R> {
719 pub fn new(reader: R, length: usize, scratch: Vec<u8>) -> Self {
721 Self {
722 runs: SignedRleV2RunIter::new(reader, length, scratch),
723 current: None,
724 }
725 }
726
727 pub fn into_inner(self) -> (R, Vec<u8>) {
729 self.runs.into_inner()
730 }
731}
732
733impl<R: Read> Iterator for SignedRleV2Iter<R> {
734 type Item = Result<i64, Error>;
735
736 #[inline]
737 fn next(&mut self) -> Option<Self::Item> {
738 let next = if let Some(run) = &mut self.current {
739 match run {
740 SignedRleV2Run::Direct(values_iter) => values_iter.next(),
741 SignedRleV2Run::Delta(values_iter) => values_iter.next(),
742 SignedRleV2Run::ShortRepeat(values_iter) => values_iter.next(),
743 }
744 } else {
745 None
746 };
747
748 if next.is_none() {
749 match self.runs.next()? {
750 Ok(run) => self.current = Some(run),
751 Err(e) => return Some(Err(e)),
752 }
753 self.next()
754 } else {
755 next.map(Ok)
756 }
757 }
758}
759
760#[cfg(test)]
761mod test {
762 use super::*;
763
764 #[test]
765 fn test_zigzag() {
766 assert_eq!(zigzag(2), 1);
767 assert_eq!(zigzag(4), 2);
768 }
769
770 #[test]
771 fn unpacking() {
772 let bytes = [0b01000000u8];
773 assert_eq!(unpack(&bytes, 2, 0), 1);
774 assert_eq!(unpack(&bytes, 2, 1), 0);
775 }
776
777 #[test]
778 fn short_repeat() {
779 let data: [u8; 3] = [0x0a, 0x27, 0x10];
781
782 let a = UnsignedShortRepeat::try_new(data[0], &mut &data[1..], vec![])
783 .unwrap()
784 .collect::<Vec<_>>();
785 assert_eq!(a, vec![10000, 10000, 10000, 10000, 10000]);
786 }
787
788 #[test]
789 fn direct() {
790 let data: [u8; 10] = [0x5e, 0x03, 0x5c, 0xa1, 0xab, 0x1e, 0xde, 0xad, 0xbe, 0xef];
792
793 let data = &mut data.as_ref();
794
795 let a = UnsignedDirectRun::try_new(data[0], &mut &data[1..], vec![])
796 .unwrap()
797 .collect::<Vec<_>>();
798 assert_eq!(a, vec![23713, 43806, 57005, 48879]);
799 }
800
801 #[test]
802 fn delta() {
803 let data: [u8; 8] = [0xc6, 0x09, 0x02, 0x02, 0x22, 0x42, 0x42, 0x46];
808
809 let data = &mut data.as_ref();
810
811 let a = UnsignedDeltaRun::try_new(data[0], &mut &data[1..], vec![])
812 .unwrap()
813 .collect::<Vec<_>>();
814 assert_eq!(a, vec![2, 3, 5, 7, 11, 13, 17, 19, 23, 29]);
815 }
816}