stream_vbyte/decode/
cursor.rs

1use crate::{
2    cumulative_encoded_len,
3    decode::{decode_num_scalar, DecodeQuadSink, Decoder, SliceDecodeSink, WriteQuadToSlice},
4    encoded_shape,
5    scalar::Scalar,
6    EncodedShape,
7};
8
9/// Offers more flexible decoding than the top-level `decode()`.
10///
11/// You can skip numbers you don't need with `skip()`, and decode the parts of your input you need
12/// with `decode_slice()`.
13///
14/// If you need maximum flexibility, you can use `decode_sink()` with a custom `DecodeQuadSink`
15/// implementation to receive numbers as they are decoded rather than storing them into a slice
16/// and then inspecting them.
17///
18/// # Decode sinks
19///
20/// If you don't want to write decoded numbers into a slice and inspect them later, you can use a
21/// custom sink. This is probably most useful when you want to minimize memory usage. For instance,
22/// you could `mmap` a file and scan through its contents with a custom sink without ever allocating
23/// on the heap.
24///
25/// There are two traits to represent a sink: one for complete quads
26/// (`DecodeQuadSink`), and one for any trailing leftover numbers that may not fill a quad
27/// (`DecodeSingleSink`). You will need to implement both with the appropriate
28/// `Decoder::DecodedQuad` type for the `Decoder` you are using. You can look at the implementations
29/// of `TupleSink` in the tests for examples.
30///
31/// # Examples
32///
33/// Here's a sink that calculates the maximum number in the input without writing the decoded input
34/// anywhere. (Unfortunately, due to a Rust bug I cannot include a SIMD example in a doc test.)
35///
36/// ```
37/// use std::cmp;
38///
39/// use rand::{Rng, seq::SliceRandom};
40///
41/// use stream_vbyte::{
42///     encode::encode,
43///     decode::{DecodeSingleSink, DecodeQuadSink, cursor::DecodeCursor},
44///     scalar::{Scalar, UnusedQuad},
45///     decode_quad_scalar
46/// };
47///
48/// struct MaxSink {
49///     max: u32
50/// }
51///
52/// impl MaxSink {
53///     fn new() -> MaxSink {
54///         MaxSink {
55///             max: 0
56///         }
57///     }
58/// }
59///
60/// // Update the max as each number is decoded
61/// impl DecodeSingleSink for MaxSink {
62///     fn on_number(&mut self, num: u32, _nums_decoded: usize) {
63///         self.max = cmp::max(self.max, num)
64///     }
65/// }
66///
67/// // DecodeQuadSink is not used for `Scalar` decoder, but the type system insists
68/// // on implementing it. This macro generates a stub impl.
69/// decode_quad_scalar!(MaxSink);
70/// // If this was using, say, the `Ssse3` decoder, would need a DecodeQuadSink impl to
71/// // find the max `u32` in a `__m128i`, perhaps with `_mm_max_epu32` (SSE 4.1).
72///
73/// fn main() {
74///     let mut nums = vec![1, 2, 3, 5, 8, 13, 21, 34];
75///
76///     // shuffle the numbers just so there's clearly nothing up our sleeve
77///     let mut rng = rand::thread_rng();
78///     nums.shuffle(&mut rng);
79///
80///     let mut encoded = vec![0; nums.len() * 5];
81///     encode::<Scalar>(&nums, &mut encoded);
82///
83///     let mut cursor = DecodeCursor::new(&encoded, nums.len());
84///     let mut sink = MaxSink::new();
85///     cursor.decode_sink::<Scalar, _>(&mut sink, nums.len());
86///
87///     assert_eq!(34, sink.max);
88/// }
89///
90/// ```
91///
92#[derive(Debug)]
93pub struct DecodeCursor<'a> {
94    control_bytes: &'a [u8],
95    encoded_nums: &'a [u8],
96    encoded_shape: EncodedShape,
97    total_nums: usize,
98    nums_decoded: usize,
99    control_bytes_read: usize,
100    encoded_bytes_read: usize,
101}
102
103impl<'a> DecodeCursor<'a> {
104    /// Create a new cursor.
105    pub fn new(input: &'a [u8], count: usize) -> DecodeCursor<'a> {
106        let shape = encoded_shape(count);
107
108        DecodeCursor {
109            control_bytes: &input[0..shape.control_bytes_len],
110            encoded_nums: &input[shape.control_bytes_len..],
111            encoded_shape: shape,
112            total_nums: count,
113            nums_decoded: 0,
114            control_bytes_read: 0,
115            encoded_bytes_read: 0,
116        }
117    }
118
119    /// Skip `to_skip` numbers. `to_skip` must be a multiple of 4, and must not be greater than the
120    /// count of remaining numbers that are in complete blocks of 4. In other words, if you have
121    /// 7 numbers remaining (a block of 4 and a partial block of 3), the only count you can skip is
122    /// 4.
123    ///
124    /// Skipping numbers is several times faster than decoding them.
125    pub fn skip(&mut self, to_skip: usize) {
126        assert_eq!(to_skip % 4, 0, "Must be a multiple of 4");
127        let control_bytes_to_skip = to_skip / 4;
128        assert!(
129            self.control_bytes_read + control_bytes_to_skip
130                <= self.encoded_shape.complete_control_bytes_len,
131            "Can't skip past the end of complete control bytes"
132        );
133
134        let slice_to_skip = &self.control_bytes
135            [self.control_bytes_read..(self.control_bytes_read + control_bytes_to_skip)];
136        let skipped_encoded_len = cumulative_encoded_len(&slice_to_skip);
137
138        self.control_bytes_read += control_bytes_to_skip;
139        self.encoded_bytes_read += skipped_encoded_len;
140        self.nums_decoded += to_skip;
141    }
142
143    /// Decode into the `output` buffer.
144    ///
145    /// If there is at least one complete quad of input remaining to decode, the buffer must be
146    /// at least of size 4.
147    ///
148    /// If there is only a final partial quad of input, the buffer must be at least as big as the
149    /// remaining input.
150    ///
151    /// Returns the number of numbers decoded by this invocation, which may be less than the size
152    /// of the buffer.
153    pub fn decode_slice<D: Decoder + WriteQuadToSlice>(&mut self, output: &mut [u32]) -> usize {
154        let output_len = output.len();
155
156        let mut sink = SliceDecodeSink::new(output);
157
158        self.decode_sink::<D, SliceDecodeSink>(&mut sink, output_len)
159    }
160
161    /// Decode at most `max_numbers_to_decode` numbers from the input and hand them to `sink`.
162    ///
163    /// Decoding is done one quad at a time, except for the last quad, which may have fewer than
164    /// four corresponding encoded numbers. Consequently, the number of numbers decoded will be a
165    /// multiple of 4, unless `max_numbers_to_decode` includes the end of the encoded input, in
166    /// which case the number of numbers will be all remaining numbers in the input regardless of
167    /// whether that's a multiple of 4 or not.
168    ///
169    /// With each invocation of `decode()`, the `nums_decoded` parameter used in
170    /// `DecodeQuadSink.on_quad()` will start counting up from 0 again.
171    ///
172    /// Returns the number of numbers decoded.
173    pub fn decode_sink<D, S>(&mut self, sink: &mut S, max_numbers_to_decode: usize) -> usize
174    where
175        D: Decoder,
176        // must support scalar decodes for leftover numbers
177        S: DecodeQuadSink<D> + DecodeQuadSink<Scalar>,
178    {
179        let start_nums_decoded = self.nums_decoded;
180        let mut complete_quad_nums_decoded_this_invocation;
181
182        let complete_control_bytes_to_decode = max_numbers_to_decode / 4;
183
184        {
185            // decode complete quads
186            let (primary_nums_decoded, primary_bytes_read) = D::decode_quads(
187                &self.control_bytes
188                    [self.control_bytes_read..self.encoded_shape.complete_control_bytes_len],
189                &self.encoded_nums[self.encoded_bytes_read..],
190                complete_control_bytes_to_decode,
191                0,
192                sink,
193            );
194
195            complete_quad_nums_decoded_this_invocation = primary_nums_decoded;
196            self.nums_decoded += primary_nums_decoded;
197            self.encoded_bytes_read += primary_bytes_read;
198            self.control_bytes_read += complete_quad_nums_decoded_this_invocation / 4;
199        }
200
201        {
202            // handle any remaining full quads if the provided Decoder did not consume all the
203            // control bytes
204            let (more_nums_decoded, more_bytes_read) = Scalar::decode_quads(
205                &self.control_bytes
206                    [self.control_bytes_read..self.encoded_shape.complete_control_bytes_len],
207                &self.encoded_nums[self.encoded_bytes_read..],
208                complete_control_bytes_to_decode - complete_quad_nums_decoded_this_invocation / 4,
209                complete_quad_nums_decoded_this_invocation,
210                sink,
211            );
212
213            complete_quad_nums_decoded_this_invocation += more_nums_decoded;
214            self.encoded_bytes_read += more_bytes_read;
215            self.control_bytes_read += more_nums_decoded / 4;
216            self.nums_decoded += more_nums_decoded;
217        }
218
219        // decode incomplete quad if we're at the end and we were asked to decode all leftovers
220        if max_numbers_to_decode - complete_quad_nums_decoded_this_invocation
221            >= self.encoded_shape.leftover_numbers
222            && self.control_bytes_read == self.encoded_shape.complete_control_bytes_len
223            && self.encoded_shape.leftover_numbers > 0
224            && self.nums_decoded < self.total_nums
225        {
226            let control_byte = self.control_bytes[self.encoded_shape.complete_control_bytes_len];
227
228            for i in 0..self.encoded_shape.leftover_numbers {
229                // first num's length in low 2 bits, last in high 2 bits
230                let bitmask = 0x03 << (i * 2);
231                let len = ((control_byte & bitmask) >> (i * 2)) as usize + 1;
232                sink.on_number(
233                    decode_num_scalar(len, &self.encoded_nums[self.encoded_bytes_read..]),
234                    complete_quad_nums_decoded_this_invocation + i,
235                );
236                self.nums_decoded += 1;
237                self.encoded_bytes_read += len;
238            }
239        }
240
241        self.nums_decoded - start_nums_decoded
242    }
243
244    /// Returns the total length of input scanned so far: the complete block of control bytes, plus
245    /// any encoded numbers decoded.
246    pub fn input_consumed(&self) -> usize {
247        self.encoded_shape.control_bytes_len + self.encoded_bytes_read
248    }
249
250    /// Returns true iff there are more numbers to be decoded.
251    pub fn has_more(&self) -> bool {
252        self.nums_decoded < self.total_nums
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use crate::encode::encode;
260
261    #[test]
262    #[should_panic(expected = "Must be a multiple of 4")]
263    fn skip_panics_on_not_multiple_of_4() {
264        DecodeCursor::new(&vec![], 0).skip(3)
265    }
266
267    #[test]
268    #[should_panic(expected = "Can't skip past the end of complete control bytes")]
269    fn skip_panics_on_exceeding_full_quads() {
270        let nums: Vec<u32> = (0..100).collect();
271        let mut encoded = Vec::new();
272        encoded.resize(nums.len() * 5, 0);
273
274        let encoded_len = encode::<Scalar>(&nums, &mut encoded);
275
276        DecodeCursor::new(&encoded[0..encoded_len], nums.len()).skip(104);
277    }
278
279    #[test]
280    fn skip_entire_enput_is_done() {
281        let nums: Vec<u32> = (0..100).collect();
282        let mut encoded = Vec::new();
283        encoded.resize(nums.len() * 5, 0);
284
285        let encoded_len = encode::<Scalar>(&nums, &mut encoded);
286        let mut cursor = DecodeCursor::new(&encoded[0..encoded_len], nums.len());
287
288        assert!(cursor.has_more());
289
290        cursor.skip(100);
291
292        assert!(!cursor.has_more());
293
294        let mut decoded: Vec<u32> = (0..100).map(|_| 0).collect();
295        // decoded has room...
296        assert_eq!(100, decoded.len());
297        // but nothing gets decoded into it
298        assert_eq!(0, cursor.decode_slice::<Scalar>(&mut decoded[..]))
299    }
300}