llvm_bitcursor/
lib.rs

1//! `llvm-bitcursor` is a no-frills library for reading unaligned fields from a bitstream.
2//! The APIs provided by this crate are specialized for internal use in an LLVM bitstream parser,
3//! and may be less useful outside of that context.
4
5#![deny(rustdoc::broken_intra_doc_links)]
6#![deny(missing_docs)]
7#![allow(clippy::redundant_field_names)]
8#![forbid(unsafe_code)]
9
10pub mod error;
11
12use std::io;
13
14use num::NumCast;
15
16use crate::error::Error;
17
18// Stupid hack trait to give `NumCast` implementations a bitsize.
19trait Bitsize {
20    fn bitsize() -> usize;
21}
22
23impl<T: NumCast> Bitsize for T {
24    fn bitsize() -> usize {
25        std::mem::size_of::<T>() * 8
26    }
27}
28
29/// A no-copy cursor wrapper for a bitstream.
30///
31/// Any type that implements `AsRef<[u8]>` can be used with `BitCursor`.
32#[derive(Debug)]
33pub struct BitCursor<T: AsRef<[u8]>> {
34    /// The cursor-accessible length of the buffer. This is normally the same
35    /// as the buffer's length, but can be shorter for uses where `inner`
36    /// is a multi-purpose buffer.
37    byte_len: usize,
38
39    /// Our inner buffer.
40    inner: T,
41
42    /// Our current byte index in `inner`, which may be ahead of our
43    /// current bit position (if `current_block` is not exhausted).
44    byte_pos: usize,
45
46    /// The last `u64`-sized block read from `inner`.
47    current_block: u64,
48
49    /// The number of bits in `current_block` that are valid (i.e., not
50    /// yet consumed).
51    bit_index: usize,
52}
53
54impl<T: AsRef<[u8]>> BitCursor<T> {
55    const BLOCK_SIZE: usize = std::mem::size_of::<u64>();
56    const BLOCK_SIZE_BITS: usize = u64::BITS as usize;
57    const MAX_VBR_BITS: usize = 32;
58
59    /// Create a new `BitCursor` for the `inner` buffer.
60    pub fn new(inner: T) -> Self {
61        Self {
62            byte_len: inner.as_ref().len(),
63            inner: inner,
64            byte_pos: 0,
65            current_block: 0,
66            bit_index: 0,
67        }
68    }
69
70    /// Create a new `BitCursor` for the `inner` buffer, limiting to `byte_len` bytes.
71    ///
72    /// Returns an error if `byte_len` exceeds `inner`'s range.
73    pub fn new_with_len(inner: T, byte_len: usize) -> Result<Self, Error> {
74        if byte_len > inner.as_ref().len() {
75            return Err(Error::InvalidLength);
76        }
77
78        Ok(Self {
79            byte_len: byte_len,
80            inner: inner,
81            byte_pos: 0,
82            current_block: 0,
83            bit_index: 0,
84        })
85    }
86
87    /// Return the length of the data wrapped by this cursor, in bytes.
88    pub fn byte_len(&self) -> usize {
89        self.byte_len
90    }
91
92    /// Return the length of the data wrapped by this cursor, in bits.
93    pub fn bit_len(&self) -> usize {
94        self.byte_len() * 8
95    }
96
97    /// Return the current position in the data, at bit granularity.
98    pub fn tell_bit(&self) -> usize {
99        (self.byte_pos * 8) - self.bit_index
100    }
101
102    /// Return the current position in the data, at byte granularity.
103    pub fn tell_byte(&self) -> usize {
104        self.tell_bit() / 8
105    }
106
107    /// Return whether the underlying data is "exhausted", i.e. whether it's
108    /// impossible to read any further from the cursor's current position.
109    pub fn exhausted(&self) -> bool {
110        self.bit_index == 0 && self.byte_len() <= self.byte_pos
111    }
112
113    /// Seek to the given bit-granular position in the bitstream.
114    ///
115    /// NOTE: This is a bit-granular absolute seek. If you only need byte granularity
116    /// or would like to do a relative (start or end) seek, use the [`Seek`](std::io::Seek)
117    /// implementation.
118    pub fn seek_bit(&mut self, pos: usize) -> Result<(), Error> {
119        log::debug!("seek_bit: seeking to {}", pos);
120
121        // Get the byte corresponding to this bit.
122        let byte_pos = (pos / 8) & !(Self::BLOCK_SIZE - 1);
123
124        if byte_pos > self.byte_len() {
125            return Err(Error::Eof);
126        }
127
128        // Change our position, and clear any internal block state.
129        self.byte_pos = byte_pos;
130        self.clear_block_state();
131
132        // Finally, we need to bring our internal block state into sync
133        // with our bit position by consuming any bits at the current
134        // word before our new position.
135        // NOTE(ww): LLVM's BitstreamReader prefers the equivalent of
136        // `pos & (usize::BITS - 1)`, presumably to avoid a modulo operation.
137        // But (experimentally) LLVM is more than smart enough to optimize
138        // this down to a single AND, so I used the modulo version here for
139        // clarity.
140        let bits_to_consume = pos % Self::BLOCK_SIZE_BITS;
141        log::debug!("bits_to_consume={}", bits_to_consume);
142        if bits_to_consume > 0 {
143            self.read(bits_to_consume)?;
144        }
145
146        Ok(())
147    }
148
149    /// Clear our internal block state.
150    ///
151    /// This should be called as part of any operation that modifies the cursor's
152    /// position within the bitstream, as any change in position invalidates the
153    /// block.
154    fn clear_block_state(&mut self) {
155        self.current_block = 0;
156        self.bit_index = 0;
157    }
158
159    /// Fill the internal block state, updating our cursor position in the process.
160    ///
161    /// This tries to read up to `usize` bytes from the underlying data,
162    /// reading fewer if a full block isn't available.
163    fn load_current_block(&mut self) -> Result<(), Error> {
164        if self.tell_byte() >= self.byte_len() {
165            return Err(Error::Eof);
166        }
167
168        // NOTE(ww): We've consumed all of the bits in our current block, so clear our state.
169        // This is essential to the correct behavior of `load_current_block`,
170        // as it uses `tell_byte` to determine which byte to begin at for the next block load.
171        self.clear_block_state();
172
173        // Do either a full or a short read, depending on how much data
174        // we have left.
175        let block_bytes = if self.tell_byte() + Self::BLOCK_SIZE < self.byte_len() {
176            &self.inner.as_ref()[self.tell_byte()..(self.tell_byte() + Self::BLOCK_SIZE)]
177        } else {
178            &self.inner.as_ref()[self.tell_byte()..self.byte_len()]
179        };
180
181        self.current_block = 0;
182        for (idx, byte) in block_bytes.iter().enumerate() {
183            self.current_block |= (*byte as u64) << (idx * 8);
184        }
185
186        // We've advanced by this many bytes.
187        self.byte_pos += block_bytes.len();
188
189        // We have this many valid bits in the current block.
190        self.bit_index = block_bytes.len() * 8;
191
192        log::debug!(
193            "load_current_block finished: current_block={}, bit_index={}",
194            self.current_block,
195            self.bit_index
196        );
197
198        Ok(())
199    }
200
201    /// Read `nbits` bits of data at the current position. The data is returned
202    /// as a `u64`.
203    ///
204    /// Returns an error if the requested read is invalid (e.g. EOF or not enough data)
205    /// or if `nbits` is invalid (zero, or >= `u64::BITS`).
206    pub fn read(&mut self, nbits: usize) -> Result<u64, Error> {
207        log::debug!(
208            "read: nbits={}, current_block={}, bit_index={}",
209            nbits,
210            self.current_block,
211            self.bit_index
212        );
213
214        if nbits == 0 || nbits >= Self::BLOCK_SIZE_BITS {
215            return Err(Error::InvalidReadSize);
216        }
217
218        // If we have enough bits in the current block, steal them and
219        // return fast.
220        if self.bit_index >= nbits {
221            log::debug!("we have enough bits!");
222
223            let read = self.current_block & (!0 >> (Self::BLOCK_SIZE_BITS - nbits));
224
225            self.current_block >>= nbits;
226            self.bit_index -= nbits;
227
228            return Ok(read);
229        }
230
231        // If we don't have enough bits, use the ones we have and fetch
232        // a new `current_block`, completing the read with its contents.
233        let bits_left = nbits - self.bit_index;
234        let part_1 = if self.bit_index > 0 {
235            self.current_block
236        } else {
237            0
238        };
239
240        self.load_current_block()?;
241
242        // `load_current_block` might succeed, but might not load in enough
243        // bits to fully service the read.
244        if bits_left > self.bit_index {
245            return Err(Error::Short);
246        }
247
248        let part_2 = self.current_block & (!0 >> (Self::BLOCK_SIZE_BITS - bits_left));
249
250        self.current_block >>= bits_left;
251        self.bit_index -= bits_left;
252
253        log::debug!(
254            "part_2 done: current_block={}, bit_index={}",
255            self.current_block,
256            self.bit_index
257        );
258
259        // Mash the parts together.
260        Ok(part_1 | (part_2 << (nbits - bits_left)))
261    }
262
263    /// Read a `nbits` of data at the current position into the given scalar type.
264    ///
265    /// Returns an error under all of the same conditions as [`read`](BitCursor::read),
266    /// as well as if the read value doesn't fit into the given scalar.
267    pub fn read_as<Int: NumCast>(&mut self, nbits: usize) -> Result<Int, Error> {
268        let res: Int = num::cast(self.read(nbits)?).ok_or(Error::BadCast)?;
269        Ok(res)
270    }
271
272    /// Read exactly the size of `Int` at the current position.
273    ///
274    /// Returns an error under all of the same conditions as [`read`](BitCursor::read).
275    pub fn read_exact<Int: NumCast>(&mut self) -> Result<Int, Error> {
276        self.read_as::<Int>(Int::bitsize())
277    }
278
279    /// Read a `width`-wide VBR-encoded integer.
280    ///
281    /// This function returns only unsigned integers. For signed integers,
282    /// use `read_svbr`.
283    #[cfg(any(feature = "vbr", doc))]
284    pub fn read_vbr(&mut self, width: usize) -> Result<u64, Error> {
285        // Sanity check: widths under 2 can't be VBR encodings, and, like LLVM itself,
286        // we simply don't support widths above 32.
287        if !(2..=Self::MAX_VBR_BITS).contains(&width) {
288            return Err(Error::InvalidVbrWidth);
289        }
290
291        let block_mask = 1 << (width - 1);
292
293        // Read each VBR block until we encounter a block that doesn't include the
294        // continuation bit.
295        let mut result: u64 = 0;
296        let mut shift = 0;
297        loop {
298            // Read a block, add it to the result (with the potential continuation bit masked off)
299            let block = self.read(width)?;
300            log::debug!("block: {:#b}, masked: {:#b}", block, block & !block_mask);
301            result |= (block & !block_mask) << shift;
302
303            // If we don't have a continuation bit, then we're done with the VBR.
304            let continuation = (block & block_mask) != 0;
305            if !continuation {
306                break;
307            };
308
309            // Calculate the shift needed for the next block.
310            shift += width - 1;
311        }
312
313        Ok(result)
314    }
315
316    /// Return a `width`-side signed VBR-encoded integer from `cursor`.
317    ///
318    /// This function returns only signed integers, assuming LLVM's signed VBR
319    /// representation.
320    #[cfg(any(feature = "vbr", doc))]
321    pub fn read_svbr(&mut self, width: usize) -> Result<isize, Error> {
322        let mut result = self.read_vbr(width)?;
323
324        // The lowest bit indicates the actual sign: high for negative and low for positive.
325        let sgn = (result & 1) != 0;
326        result >>= 1;
327
328        if sgn {
329            Ok(-(result as isize))
330        } else {
331            Ok(result as isize)
332        }
333    }
334
335    /// Align the stream on the next 32-bit boundary.
336    ///
337    /// Any data consumed during alignment is discarded.
338    pub fn align32(&mut self) {
339        log::debug!("aligning the cursor");
340
341        if self.bit_index >= 32 {
342            self.current_block >>= self.bit_index - 32;
343            self.bit_index = 32;
344        } else {
345            self.clear_block_state();
346        }
347    }
348}
349
350/// A `Seek` implementation for `BitCursor`.
351///
352/// Seeking past the end of a `BitCursor` is always invalid, and always returns
353/// an error.
354///
355/// NOTE: This is a byte-granular implementation of `Seek`.
356/// For bit-granular seeking, use [`seek_bit`](BitCursor::seek_bit).
357impl<T: AsRef<[u8]>> io::Seek for BitCursor<T> {
358    fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
359        // Note the ugly as-casting below: we ultimately turn `off` into a
360        // `usize` to make it compatible with indexing (since we always have
361        // a backing buffer), but we first have to round-trip it through i64
362        // for relative seeks.
363        let off = match pos {
364            io::SeekFrom::Start(pos) => pos,
365            io::SeekFrom::End(pos) => {
366                if pos >= 0 {
367                    return Err(io::Error::new(
368                        io::ErrorKind::Unsupported,
369                        "cannot seek past end",
370                    ));
371                }
372
373                // Seeking backwards from the end is perfectly fine.
374                ((self.byte_len() as i64) + pos) as u64
375            }
376            io::SeekFrom::Current(pos) => ((self.tell_byte() as i64) + pos) as u64,
377        } as usize;
378
379        // Sanity check: we can't seek before or beyond the backing buffer.
380        // We can, however, seek to the exact end of the backing buffer, to
381        // indicate an EOF condition.
382        // We don't need to check for a negative offset here, since we've cast
383        // back into the land of unsigned integers.
384        if off > self.byte_len() {
385            return Err(io::Error::new(
386                io::ErrorKind::InvalidInput,
387                "impossible seek requested",
388            ));
389        }
390
391        // Actually update our location.
392        self.byte_pos = off;
393
394        // Regardless of the kind of seek, we reset our current block state to ensure that any
395        // subsequent reads are correct.
396        self.clear_block_state();
397
398        Ok(off as u64)
399    }
400
401    fn stream_position(&mut self) -> io::Result<u64> {
402        Ok(self.tell_byte() as u64)
403    }
404
405    // TODO(ww): Supply this when it's stabilized.
406    // fn stream_len(&mut self) -> io::Result<u64> {
407    //     Ok(self.byte_len() as u64)
408    // }
409}
410
411#[cfg(test)]
412mod tests {
413    use std::io::Seek;
414
415    use super::*;
416
417    fn cursor(buf: &[u8]) -> BitCursor<&[u8]> {
418        BitCursor::new(&buf)
419    }
420
421    #[test]
422    fn test_new_with_len_invalid_length() {
423        assert!(BitCursor::new_with_len(&[0xff, 0xee], 3).is_err());
424    }
425
426    #[test]
427    fn test_read_basic() {
428        let mut cur = cursor(&[0b00011011]);
429
430        // Our initial state is reasonable.
431        assert_eq!(cur.bit_len(), 8);
432        assert_eq!(cur.byte_len(), 1);
433        assert_eq!(cur.tell_bit(), 0);
434        assert_eq!(cur.tell_byte(), 0);
435
436        // After each read, we advance by the appropriate number of bits/bytes.
437        assert_eq!(cur.read(2).unwrap(), 0b11);
438        assert_eq!(cur.tell_bit(), 2);
439        assert_eq!(cur.tell_byte(), 0);
440        assert_eq!(cur.read(2).unwrap(), 0b10);
441        assert_eq!(cur.tell_bit(), 4);
442        assert_eq!(cur.tell_byte(), 0);
443        assert_eq!(cur.read(2).unwrap(), 0b01);
444        assert_eq!(cur.tell_bit(), 6);
445        assert_eq!(cur.tell_byte(), 0);
446        assert_eq!(cur.read(2).unwrap(), 0b00);
447        assert_eq!(cur.tell_bit(), 8);
448        assert_eq!(cur.tell_byte(), 1);
449
450        // We've fully consumed the stream, so this read produces an error.
451        assert!(cur.read(1).is_err());
452    }
453
454    #[cfg(feature = "vbr")]
455    #[test]
456    fn test_invalid_reads() {
457        let mut cur = cursor(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
458
459        // VBRs over 32 bits just aren't supported.
460        assert!(cur.read_vbr(33).is_err());
461
462        // Normal reads >= 64 bits just aren't supported.
463        assert!(cur.read(64).is_err());
464    }
465
466    #[test]
467    fn test_read_multiple_sizes() {
468        let mut cur = cursor(&[0xAA, 0xBB, 0xCC, 0b10010101]);
469
470        assert_eq!(cur.read(24).unwrap(), 0xCCBBAA);
471        assert_eq!(cur.read(5).unwrap(), 0b10101);
472        assert_eq!(cur.read(3).unwrap(), 0b100);
473
474        // We've fully consumed the stream, so this read produces an error.
475        assert!(cur.read(1).is_err());
476    }
477
478    #[test]
479    fn test_read_bounds() {
480        let mut cur = cursor(&[0xAA]);
481
482        // Reads below 1 bit or above the usize bitwidth aren't allowed.
483        assert!(cur.read(0).is_err());
484        assert!(cur.read(usize::BITS as usize + 1).is_err());
485    }
486
487    #[test]
488    fn test_read_llvm_wrapper_magic() {
489        let mut cur = cursor(&[0xde, 0xc0, 0x17, 0x0b]);
490
491        assert_eq!(cur.read(32).unwrap(), 0x0B17C0DE);
492    }
493
494    #[test]
495    fn test_read_llvm_raw_magic() {
496        let mut cur = cursor(&[b'B', b'C', 0xc0, 0xde]);
497
498        assert_eq!(cur.read(32).unwrap(), 0xdec04342);
499    }
500
501    #[test]
502    fn test_read_across_blocks() {
503        #[rustfmt::skip]
504        let mut cur = cursor(&[
505            0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
506            0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
507        ]);
508
509        assert_eq!(cur.read(56).unwrap(), 0x77665544332211);
510        assert_eq!(cur.read(24).unwrap(), 0x221188);
511    }
512
513    #[test]
514    fn test_read_across_blocks_unaligned() {
515        #[rustfmt::skip]
516        let mut cur = cursor(&[
517            0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0b11111111,
518            0b00011001, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
519        ]);
520
521        assert_eq!(cur.read(56).unwrap(), 0x77665544332211);
522        assert_eq!(cur.read(5).unwrap(), 0b11111);
523        assert_eq!(cur.read(5).unwrap(), 0b01111);
524        assert_eq!(cur.read(6).unwrap(), 0b000110);
525    }
526
527    #[test]
528    fn test_read_and_align() {
529        #[rustfmt::skip]
530        let mut cur = cursor(&[
531            0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0b11111111,
532            0b00011001, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
533        ]);
534
535        assert_eq!(cur.read(56).unwrap(), 0x77665544332211);
536        assert_eq!(cur.read(5).unwrap(), 0b11111);
537        assert_eq!(cur.read(5).unwrap(), 0b01111);
538        cur.align32();
539        assert_eq!(cur.read(8).unwrap(), 0x55);
540        assert_eq!(cur.read(16).unwrap(), 0x7766);
541        assert_eq!(cur.read(5).unwrap(), 0b01000);
542        assert_eq!(cur.read(3).unwrap(), 0b100);
543        assert!(cur.read(1).is_err());
544    }
545
546    #[test]
547    fn test_read_as() {
548        let mut cur = cursor(&[0xAA, 0xBB, 0xCC, 0xDD]);
549
550        assert_eq!(cur.read_as::<u16>(16).unwrap(), 0xBBAA);
551        assert_eq!(cur.read_as::<u32>(16).unwrap(), 0xDDCC);
552    }
553
554    #[test]
555    fn test_read_as_bounds() {
556        let mut cur = cursor(&[0xFF, 0xFF, 0xFF, 0xFF]);
557
558        // Attempting to read a value into a type that can't hold that value
559        // produces an error.
560        assert!(cur.read_as::<u16>(17).is_err());
561    }
562
563    #[test]
564    fn test_read_exact() {
565        let mut cur = cursor(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE]);
566
567        // read_exact reads exactly the type's bitsize.
568        assert_eq!(cur.read_exact::<u32>().unwrap(), 0xDDCCBBAA);
569        assert_eq!(cur.tell_bit(), u32::BITS as usize);
570        assert_eq!(cur.tell_byte(), (u32::BITS / 8) as usize);
571
572        assert_eq!(cur.read_exact::<u8>().unwrap(), 0xEE);
573        assert_eq!(cur.tell_bit(), (u32::BITS + u8::BITS) as usize);
574        assert_eq!(cur.tell_byte(), ((u32::BITS + u8::BITS) / 8) as usize);
575    }
576
577    #[test]
578    fn test_seek_bit() {
579        let mut cur = cursor(&[0b1111_1110, 0b1010_0111]);
580
581        assert_eq!(cur.read(4).unwrap(), 0b1110);
582
583        // Seek halfway into the first byte.
584        cur.seek_bit(4).unwrap();
585        assert_eq!(cur.tell_bit(), 4);
586        assert_eq!(cur.tell_byte(), 0);
587        assert_eq!(cur.bit_index, 12);
588
589        // Read the next byte's worth.
590        // NOTE(ww): The value here is unintuitive from the cursor initialization
591        // above: remember that values are always read LSB first, so our next byte
592        // comes from the low nibble of the 2nd byte of input plus the high nibble
593        // of the 1st.
594        assert_eq!(cur.read(8).unwrap(), 0b0111_1111);
595        assert_eq!(cur.tell_bit(), 12);
596        assert_eq!(cur.tell_byte(), 1);
597        assert_eq!(cur.bit_index, 4);
598
599        // Consume the last nibble.
600        assert_eq!(cur.read(4).unwrap(), 0b1010);
601
602        // Sanity check: we should be fully consumed.
603        assert!(cur.read(1).is_err());
604    }
605
606    #[test]
607    fn test_seek() {
608        let mut cur = cursor(&[0xAA, 0xBB, 0xCC, 0xDD]);
609
610        // Consume the whole thing, putting us at the end.
611        assert_eq!(cur.read(32).unwrap(), 0xDDCCBBAA);
612        assert_eq!(cur.tell_bit(), 32);
613        assert_eq!(cur.tell_byte(), 4);
614
615        // Seek relative, backwards by two bytes.
616        cur.seek(io::SeekFrom::Current(-2)).unwrap();
617        assert_eq!(cur.tell_bit(), 16);
618        assert_eq!(cur.tell_byte(), 2);
619        assert_eq!(cur.read(16).unwrap(), 0xDDCC);
620        assert_eq!(cur.tell_bit(), 32);
621        assert_eq!(cur.tell_byte(), 4);
622
623        // Go back to the start.
624        cur.seek(io::SeekFrom::Start(0)).unwrap();
625        assert_eq!(cur.tell_bit(), 0);
626        assert_eq!(cur.tell_byte(), 0);
627        assert_eq!(cur.read(32).unwrap(), 0xDDCCBBAA);
628        assert_eq!(cur.tell_bit(), 32);
629        assert_eq!(cur.tell_byte(), 4);
630
631        // Seek somewhere in the middle.
632        cur.seek(io::SeekFrom::Start(1)).unwrap();
633        assert_eq!(cur.tell_bit(), 8);
634        assert_eq!(cur.tell_byte(), 1);
635        assert_eq!(cur.read(8).unwrap(), 0xBB);
636
637        // Seek backwards from the end.
638        cur.seek(io::SeekFrom::End(-1)).unwrap();
639        assert_eq!(cur.tell_bit(), 24);
640        assert_eq!(cur.tell_byte(), 3);
641        assert_eq!(cur.read(8).unwrap(), 0xDD);
642
643        // Seeking past the end is completely unsupported.
644        assert!(cur.seek(io::SeekFrom::End(1)).is_err())
645    }
646
647    #[cfg(feature = "vbr")]
648    #[test]
649    fn test_vbr2_continuation() {
650        let mut cur = cursor(&[0b01101011]);
651
652        assert_eq!(cur.read_vbr(2).unwrap(), 9);
653    }
654
655    #[cfg(feature = "vbr")]
656    #[test]
657    fn test_vbr4_basic() {
658        let mut cur = cursor(&[0b00000111]);
659
660        assert_eq!(cur.read_vbr(4).unwrap(), 7);
661    }
662
663    #[cfg(feature = "vbr")]
664    #[test]
665    fn test_vbr4_continuation() {
666        let mut cur = cursor(&[0b0011_1011]);
667
668        assert_eq!(cur.read_vbr(4).unwrap(), 27);
669    }
670
671    #[cfg(feature = "vbr")]
672    #[test]
673    fn test_vbr6_basic() {
674        let mut cur = cursor(&[0b00_010000]);
675        assert_eq!(cur.read_vbr(6).unwrap(), 16);
676        assert_eq!(cur.tell_bit(), 6);
677    }
678
679    #[cfg(feature = "vbr")]
680    #[test]
681    fn test_vbr6_continuation() {
682        let mut cur = cursor(&[0b01_100001, 0b0011_1001, 0b100111_00]);
683        assert_eq!(cur.read_vbr(6).unwrap(), 3233);
684        assert_eq!(cur.read(3).unwrap(), 0b111);
685        assert_eq!(cur.read(3).unwrap(), 0b100);
686    }
687
688    #[cfg(feature = "vbr")]
689    #[test]
690    fn test_svbr4() {
691        // -3 as a signed VBR4 is `(-(-3) << 1) | 1`, i.e. 7, i.e. 0b0111.
692        let mut cur = cursor(&[0b0000_0111]);
693
694        assert_eq!(cur.read_svbr(4).unwrap(), -3);
695    }
696
697    #[test]
698    fn test_align32() {
699        {
700            let mut cur = cursor(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
701
702            assert_eq!(cur.read(8).unwrap(), 0xAA);
703            cur.align32();
704            assert_eq!(cur.tell_bit(), 32);
705            assert_eq!(cur.read(8).unwrap(), 0xEE);
706            assert_eq!(cur.read(24).unwrap(), 0x2211FF);
707            assert_eq!(cur.tell_bit(), 64);
708        }
709
710        {
711            let mut cur = cursor(&[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
712
713            cur.align32();
714            assert_eq!(cur.tell_bit(), 0);
715
716            cur.read(32).unwrap();
717            cur.align32();
718            assert_eq!(cur.tell_bit(), 32);
719        }
720
721        {
722            #[rustfmt::skip]
723            let mut cur = cursor(&[
724                0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
725                0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00,
726            ]);
727
728            cur.read(63).unwrap();
729            cur.read(1).unwrap();
730            assert_eq!(cur.tell_bit(), 64);
731            cur.align32();
732            assert_eq!(cur.tell_bit(), 64);
733
734            cur.seek_bit(0).unwrap();
735            cur.read(63).unwrap();
736            cur.read(1).unwrap();
737            cur.read(32).unwrap();
738            cur.align32();
739            assert_eq!(cur.tell_bit(), 96);
740            cur.read(1).unwrap();
741            cur.align32();
742            assert_eq!(cur.tell_bit(), 128);
743        }
744    }
745
746    #[test]
747    fn test_align32_unaligned() {
748        let mut cur = cursor(&[0b00011100, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]);
749
750        assert_eq!(cur.read(5).unwrap(), 0b11100);
751        cur.align32();
752        assert_eq!(cur.tell_bit(), 32);
753        assert_eq!(cur.read(32).unwrap(), 0x2211FFEE);
754        assert_eq!(cur.tell_bit(), 64);
755    }
756
757    #[test]
758    fn test_align32_next_block() {
759        {
760            #[rustfmt::skip]
761            let mut cur = cursor(&[
762                0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
763                0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00,
764            ]);
765
766            cur.read(56).unwrap();
767            cur.align32();
768            assert_eq!(cur.read(32).unwrap(), 0xCCBBAA99);
769            assert_eq!(cur.tell_bit(), 96);
770        }
771
772        {
773            #[rustfmt::skip]
774            let mut cur = cursor(&[
775                0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
776                0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x00,
777            ]);
778
779            cur.read(56).unwrap();
780            cur.read(17).unwrap();
781            cur.align32();
782            assert_eq!(cur.read(32).unwrap(), 0x00FFEEDD);
783            assert_eq!(cur.tell_bit(), 128);
784        }
785    }
786
787    #[cfg(feature = "vbr")]
788    #[test]
789    fn test_parse_unabbrev() {
790        // assuming abbrev id width=2
791        #[rustfmt::skip]
792        let mut cur = cursor(&[
793            0b0001_01_11, 0b000001_00, 0b00_000110, 0xFF,
794            0b0001_01_11, 0b000001_00, 0b00_000110, 0b11111111,
795            0b1_01_11_101, 0b001_00000, 0b00000_000, 0b00000011,
796        ]);
797
798        assert_eq!(cur.read_vbr(2).unwrap(), 3); // DEFINE_UNABBREV
799        assert_eq!(cur.read_vbr(6).unwrap(), 1); // code 1
800        assert_eq!(cur.read_vbr(6).unwrap(), 1); // 1 field
801        assert_eq!(cur.read_vbr(6).unwrap(), 6); // value 6
802
803        cur.align32();
804
805        assert_eq!(cur.read_vbr(2).unwrap(), 3); // DEFINE_UNABBREV
806        assert_eq!(cur.read_vbr(6).unwrap(), 1); // code 1
807        assert_eq!(cur.read_vbr(6).unwrap(), 1); // 1 field
808        assert_eq!(cur.read_vbr(6).unwrap(), 6); // value 6
809
810        assert_eq!(cur.tell_bit(), 54);
811        assert_eq!(cur.read(13).unwrap(), 0b101_11111111_00);
812        assert_eq!(cur.read_vbr(2).unwrap(), 3); // DEFINE_UNABBREV
813        assert_eq!(cur.read_vbr(6).unwrap(), 1); // code 1
814        assert_eq!(cur.read_vbr(6).unwrap(), 1); // 1 field
815        assert_eq!(cur.read_vbr(6).unwrap(), 32); // value 32
816    }
817
818    #[cfg(feature = "vbr")]
819    #[test]
820    fn test_pseudo_bitstream1() {
821        let bytes = b"\xAA\xAA\x42\x43\xC0\xDE\x35\x14\x00\x00\x05\x00\x00\x00\x62\x0C";
822        let mut cur = cursor(bytes);
823
824        assert_eq!(cur.read(16).unwrap(), 0xAAAA);
825        assert_eq!(cur.read(32).unwrap(), 0xDEC04342);
826        assert_eq!(cur.read(2).unwrap(), 0b01); // ENTER_SUBBLOCK
827        assert_eq!(cur.read_vbr(8).unwrap(), 13); // Block ID #13 (IDENTIFICATION_BLOCK)
828        assert_eq!(cur.read_vbr(5).unwrap(), 5); // New abbrev width=5
829        assert_eq!(cur.bit_index, 1);
830        assert_eq!(cur.tell_bit(), 63);
831        cur.align32();
832        assert_eq!(cur.bit_index, 0);
833        assert_eq!(cur.current_block, 0);
834        assert_eq!(cur.tell_bit(), 64);
835        cur.read(16).unwrap();
836        assert_eq!(cur.read(32).unwrap(), 5);
837    }
838}
llvm_bitcursor/lib.rs

llvm_bitcursor/
lib.rs