bitreader/lib.rs
1// Copyright 2015 Ilkka Rauta
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! BitReader is a helper type to extract strings of bits from a slice of bytes.
10//!
11//! Here is how you read first a single bit, then three bits and finally four bits from a byte
12//! buffer:
13//!
14//! ```
15//! use bitreader::BitReader;
16//!
17//! let slice_of_u8 = &[0b1000_1111];
18//! let mut reader = BitReader::new(slice_of_u8);
19//!
20//! // You probably should use try! or some other error handling mechanism in real code if the
21//! // length of the input is not known in advance.
22//! let a_single_bit = reader.read_u8(1).unwrap();
23//! assert_eq!(a_single_bit, 1);
24//!
25//! let more_bits = reader.read_u8(3).unwrap();
26//! assert_eq!(more_bits, 0);
27//!
28//! let last_bits_of_byte = reader.read_u8(4).unwrap();
29//! assert_eq!(last_bits_of_byte, 0b1111);
30//! ```
31//! You can naturally read bits from longer buffer of data than just a single byte.
32//!
33//! As you read bits, the internal cursor of BitReader moves on along the stream of bits. Big
34//! endian format is assumed when reading the multi-byte values. BitReader supports reading maximum
35//! of 64 bits at a time (with read_u64). Reading signed values directly is not supported at the
36//! moment.
37//!
38//! The reads do not need to be aligned in any particular way.
39//!
40//! Reading zero bits is a no-op.
41//!
42//! You can also skip over a number of bits, in which case there is no arbitrary small limits like
43//! when reading the values to a variable. However, you can not seek past the end of the slice,
44//! either when reading or when skipping bits.
45//!
46//! Note that the code will likely not work correctly if the slice is longer than 2^61 bytes, but
47//! exceeding that should be pretty unlikely. Let's get back to this when people read exabytes of
48//! information one bit at a time.
49#![no_std]
50cfg_if::cfg_if!{
51    if #[cfg(feature = "std")] {
52        extern crate std;
53        use std::cmp::min;
54        use std::prelude::v1::*;
55        use std::fmt;
56        use std::error::Error;
57        use std::result;
58    } else {
59        use core::result;
60        use core::fmt;
61        use core::cmp::min;
62    }
63}
64
65#[cfg(test)]
66mod tests;
67
68/// BitReader reads data from a byte slice at the granularity of a single bit.
69pub struct BitReader<'a> {
70    bytes: &'a [u8],
71    /// Position from the start of the slice, counted as bits instead of bytes
72    position: u64,
73    relative_offset: u64,
74
75    /// Length this reader is allowed to read from the slice, counted as bits instead of bytes.
76    length: u64,
77}
78
79impl<'a> BitReader<'a> {
80    /// Construct a new BitReader from a byte slice. The returned reader lives at most as long as
81    /// the slice given to is valid.
82    #[inline]
83    pub fn new(bytes: &'a [u8]) -> BitReader<'a> {
84        BitReader {
85            bytes: bytes,
86            position: 0,
87            relative_offset: 0,
88            length: bytes.len() as u64 * 8,
89        }
90    }
91
92    /// Returns a copy of current BitReader, with the difference that its position() returns
93    /// positions relative to the position of the original BitReader at the construction time.
94    /// After construction, both readers are otherwise completely independent, except of course
95    /// for sharing the same source data.
96    ///
97    /// ```
98    /// use bitreader::BitReader;
99    ///
100    /// let bytes = &[0b11110000, 0b00001111];
101    /// let mut original = BitReader::new(bytes);
102    /// assert_eq!(original.read_u8(4).unwrap(), 0b1111);
103    /// assert_eq!(original.position(), 4);
104    ///
105    /// let mut relative = original.relative_reader();
106    /// assert_eq!(relative.position(), 0);
107    ///
108    /// assert_eq!(original.read_u8(8).unwrap(), 0);
109    /// assert_eq!(relative.read_u8(8).unwrap(), 0);
110    ///
111    /// assert_eq!(original.position(), 12);
112    /// assert_eq!(relative.position(), 8);
113    /// ```
114    #[inline]
115    pub fn relative_reader(&self) -> BitReader<'a> {
116        BitReader {
117            bytes: self.bytes,
118            position: self.position,
119            relative_offset: self.position,
120            length: self.length - self.position(),
121        }
122    }
123
124    /// Returns a copy of current BitReader, with the difference that its position() returns
125    /// positions relative to the position of the original BitReader at the construction time, and
126    /// will not allow reading more than len bits. After construction, both readers are otherwise
127    // completely independent, except of course for sharing the same source data.
128    ///
129    /// ```
130    /// use bitreader::BitReader;
131    /// use bitreader::BitReaderError;
132    ///
133    /// let bytes = &[0b11110000, 0b00001111];
134    /// let mut original = BitReader::new(bytes);
135    /// assert_eq!(original.read_u8(4).unwrap(), 0b1111);
136    /// assert_eq!(original.position(), 4);
137    ///
138    /// let mut relative = original.relative_reader_atmost(8);
139    /// assert_eq!(relative.position(), 0);
140    ///
141    /// assert_eq!(original.read_u8(8).unwrap(), 0);
142    /// assert_eq!(relative.read_u8(8).unwrap(), 0);
143    ///
144    /// assert_eq!(original.position(), 12);
145    /// assert_eq!(relative.position(), 8);
146    ///
147    /// assert_eq!(relative.read_u8(8).unwrap_err(), BitReaderError::NotEnoughData{
148    ///    position: 8,
149    ///    length: 8,
150    ///    requested: 8
151    /// });
152    /// ```
153    #[inline]
154    pub fn relative_reader_atmost(&self, len: u64) -> BitReader<'a> {
155        BitReader {
156            bytes: self.bytes,
157            position: self.position,
158            relative_offset: self.position,
159            length: min(self.length - self.position(), len),
160        }
161    }
162
163    /// Read at most 8 bits into a u8.
164    #[inline]
165    pub fn read_u8(&mut self, bit_count: u8) -> Result<u8> {
166        let value = self.read_value(bit_count, 8)?;
167        Ok((value & 0xff) as u8)
168    }
169
170    /// Read at most 8 bits into a u8, but without moving the cursor forward.
171    #[inline]
172    pub fn peek_u8(&self, bit_count: u8) -> Result<u8> {
173        self.relative_reader().read_u8(bit_count)
174    }
175
176    /// Fills the entire `output_bytes` slice. If there aren't enough bits remaining
177    /// after the internal cursor's current position, the cursor won't be moved forward
178    /// and the contents of `output_bytes` won't be modified.
179    pub fn read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()> {
180        let requested = output_bytes.len() as u64 * 8;
181        if requested > self.remaining() {
182            Err(BitReaderError::NotEnoughData {
183                position: self.position(),
184                length: self.length,
185                requested,
186            })
187        } else {
188            for byte in output_bytes.iter_mut() {
189                *byte = self.read_u8(8)?;
190            }
191            Ok(())
192        }
193    }
194
195    /// Read at most 16 bits into a u16.
196    #[inline]
197    pub fn read_u16(&mut self, bit_count: u8) -> Result<u16> {
198        let value = self.read_value(bit_count, 16)?;
199        Ok((value & 0xffff) as u16)
200    }
201
202    /// Read at most 16 bits into a u16, but without moving the cursor forward.
203    #[inline]
204    pub fn peek_u16(&self, bit_count: u8) -> Result<u16> {
205        self.relative_reader().read_u16(bit_count)
206    }
207
208    /// Read at most 32 bits into a u32.
209    #[inline]
210    pub fn read_u32(&mut self, bit_count: u8) -> Result<u32> {
211        let value = self.read_value(bit_count, 32)?;
212        Ok((value & 0xffffffff) as u32)
213    }
214
215    /// Read at most 32 bits into a u32, but without moving the cursor forward.
216    #[inline]
217    pub fn peek_u32(&self, bit_count: u8) -> Result<u32> {
218        self.relative_reader().read_u32(bit_count)
219    }
220
221    /// Read at most 64 bits into a u64.
222    #[inline]
223    pub fn read_u64(&mut self, bit_count: u8) -> Result<u64> {
224        let value = self.read_value(bit_count, 64)?;
225        Ok(value)
226    }
227
228    /// Read at most 64 bits into a u64, but without moving the cursor forward.
229    #[inline]
230    pub fn peek_u64(&self, bit_count: u8) -> Result<u64> {
231        self.relative_reader().read_u64(bit_count)
232    }
233
234    /// Read at most 8 bits into a i8.
235    /// Assumes the bits are stored in two's complement format.
236    #[inline]
237    pub fn read_i8(&mut self, bit_count: u8) -> Result<i8> {
238        let value = self.read_signed_value(bit_count, 8)?;
239        Ok((value & 0xff) as i8)
240    }
241
242    /// Read at most 16 bits into a i16.
243    /// Assumes the bits are stored in two's complement format.
244    #[inline]
245    pub fn read_i16(&mut self, bit_count: u8) -> Result<i16> {
246        let value = self.read_signed_value(bit_count, 16)?;
247        Ok((value & 0xffff) as i16)
248    }
249
250    /// Read at most 32 bits into a i32.
251    /// Assumes the bits are stored in two's complement format.
252    #[inline]
253    pub fn read_i32(&mut self, bit_count: u8) -> Result<i32> {
254        let value = self.read_signed_value(bit_count, 32)?;
255        Ok((value & 0xffffffff) as i32)
256    }
257
258    /// Read at most 64 bits into a i64.
259    /// Assumes the bits are stored in two's complement format.
260    #[inline]
261    pub fn read_i64(&mut self, bit_count: u8) -> Result<i64> {
262        let value = self.read_signed_value(bit_count, 64)?;
263        Ok(value)
264    }
265
266    /// Read a single bit as a boolean value.
267    /// Interprets 1 as true and 0 as false.
268    #[inline]
269    pub fn read_bool(&mut self) -> Result<bool> {
270        match self.read_value(1, 1)? {
271            0 => Ok(false),
272            _ => Ok(true),
273        }
274    }
275
276    /// Read a single bit as a boolean value, but without moving the cursor forward.
277    /// Interprets 1 as true and 0 as false.
278    #[inline]
279    pub fn peek_bool(&self) -> Result<bool> {
280        self.relative_reader().read_bool()
281    }
282
283    /// Skip arbitrary number of bits. However, you can skip at most to the end of the byte slice.
284    pub fn skip(&mut self, bit_count: u64) -> Result<()> {
285        let end_position = self.position + bit_count;
286        if end_position > (self.relative_offset + self.length) {
287            return Err(BitReaderError::NotEnoughData {
288                position: self.position(),
289                length: self.length,
290                requested: bit_count,
291            });
292        }
293        self.position = end_position;
294        Ok(())
295    }
296
297    /// Returns the position of the cursor, or how many bits have been read so far.
298    #[inline]
299    pub fn position(&self) -> u64 {
300        self.position - self.relative_offset
301    }
302
303    /// Returns the number of bits not yet read from the underlying slice.
304    #[inline]
305    pub fn remaining(&self) -> u64 {
306        self.length - self.position()
307    }
308
309    /// Helper to make sure the "bit cursor" is exactly at the beginning of a byte, or at specific
310    /// multi-byte alignment position.
311    ///
312    /// For example `reader.is_aligned(1)` returns true if exactly n bytes, or n * 8 bits, has been
313    /// read. Similarly, `reader.is_aligned(4)` returns true if exactly n * 32 bits, or n 4-byte
314    /// sequences has been read.
315    ///
316    /// This function can be used to validate the data is being read properly, for example by
317    /// adding invocations wrapped into `debug_assert!()` to places where it is known the data
318    /// should be n-byte aligned.
319    #[inline]
320    pub fn is_aligned(&self, alignment_bytes: u32) -> bool {
321        self.position % (alignment_bytes as u64 * 8) == 0
322    }
323
324    /// Helper to move the "bit cursor" to exactly the beginning of a byte, or to a specific
325    /// multi-byte alignment position.
326    ///
327    /// That is, `reader.align(n)` moves the cursor to the next position that
328    /// is a multiple of n * 8 bits, if it's not correctly aligned already.
329    pub fn align(&mut self, alignment_bytes: u32) -> Result<()> {
330        let alignment_bits = alignment_bytes as u64 * 8;
331        let cur_alignment = self.position % alignment_bits;
332        let bits_to_skip = (alignment_bits - cur_alignment) % alignment_bits;
333        self.skip(bits_to_skip)
334    }
335
336    #[inline]
337    fn read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64> {
338        if bit_count == 0 {
339            return Ok(0);
340        }
341        let unsigned = self.read_value(bit_count, maximum_count)?;
342        // Fill the bits above the requested bits with all ones or all zeros,
343        // depending on the sign bit.
344        let sign_bit = unsigned >> (bit_count - 1) & 1;
345        let high_bits = if sign_bit == 1 { -1 } else { 0 };
346        if bit_count == 64 {
347            // Avoid left-shift-with-overflow exception
348            return Ok(unsigned as i64);
349        }
350        Ok(high_bits << bit_count | unsigned as i64)
351    }
352
353    #[cold]
354    fn too_many_bits_for_type_error(&self, bit_count: u8, maximum_count: u8) -> Result<u64> {
355        Err(BitReaderError::TooManyBitsForType {
356            position: self.position,
357            requested: bit_count,
358            allowed: maximum_count,
359        })
360    }
361
362    #[inline]
363    // Inlining allows the size checks to be eliminated for constant values
364    fn read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64> {
365        if bit_count == 0 {
366            return Ok(0);
367        }
368        if bit_count > maximum_count {
369            return self.too_many_bits_for_type_error(bit_count, maximum_count);
370        }
371        self.read_bits(bit_count)
372    }
373
374    fn read_bits(&mut self, bit_count: u8) -> Result<u64> {
375        let start_position = self.position;
376        let end_position = self.position + bit_count as u64;
377        if end_position > (self.relative_offset + self.length) {
378            return Err(BitReaderError::NotEnoughData {
379                position: self.position(),
380                length: self.length,
381                requested: bit_count as u64,
382            });
383        }
384
385        let mut value: u64 = 0;
386
387        for i in start_position..end_position {
388            let byte_index = (i / 8) as usize;
389            // This will never fail, but prevents Rust from generating panic code
390            let byte = if let Some(byte) = self.bytes.get(byte_index).copied() {
391                byte
392            } else {
393                break;
394            };
395            let shift = 7 - (i % 8);
396            let bit = (byte >> shift) as u64 & 1;
397            value = (value << 1) | bit;
398        }
399
400        self.position = end_position;
401        Ok(value)
402    }
403}
404
405/// Result type for those BitReader operations that can fail.
406pub type Result<T> = result::Result<T, BitReaderError>;
407
408/// Error enumeration of BitReader errors.
409#[derive(Debug,PartialEq,Copy,Clone)]
410pub enum BitReaderError {
411    /// Requested more bits than there are left in the byte slice at the current position.
412    NotEnoughData {
413        /// Current posititon in bits relative to the beginning of the reader.
414        position: u64,
415
416        /// Total readable length in bits of the underlaying slice.
417        length: u64,
418
419        /// Bits requested to be read.
420        requested: u64,
421    },
422    /// Requested more bits than the returned variable can hold, for example more than 8 bits when
423    /// reading into a u8.
424    TooManyBitsForType {
425        position: u64,
426        requested: u8,
427        allowed: u8,
428    }
429}
430
431#[cfg(feature = "std")]
432impl Error for BitReaderError {
433    fn description(&self) -> &str {
434        match *self {
435            BitReaderError::NotEnoughData {..} => "Requested more bits than the byte slice has left",
436            BitReaderError::TooManyBitsForType {..} => "Requested more bits than the requested integer type can hold",
437        }
438    }
439}
440
441impl fmt::Display for BitReaderError {
442    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
443        //self.description().fmt(fmt)
444        match *self {
445            BitReaderError::NotEnoughData { position, length, requested } => write!(fmt, "BitReader: Requested {} bits with only {}/{} bits left (position {})", requested, length - position, length, position),
446            BitReaderError::TooManyBitsForType { position, requested, allowed } => write!(fmt, "BitReader: Requested {} bits while the type can only hold {} (position {})", requested, allowed, position),
447        }
448    }
449}
450
451/// Helper trait to allow reading bits into a variable without explicitly mentioning its type.
452///
453/// If you can't or want, for some reason, to use BitReader's read methods (`read_u8` etc.) but
454/// want to rely on type inference instead, you can use the ReadInto trait. The trait is
455/// implemented for all basic integer types (8/16/32/64 bits, signed/unsigned)
456/// and the boolean type.
457///
458/// ```
459/// use bitreader::{BitReader,ReadInto};
460///
461/// let slice_of_u8 = &[0b1110_0000];
462/// let mut reader = BitReader::new(slice_of_u8);
463///
464/// struct Foo {
465///     bar: u8,
466///     valid: bool,
467/// }
468///
469/// // No type mentioned here, instead the type of bits is inferred from the type of Foo::bar,
470/// // and consequently the correct "overload" is used.
471/// let bits = ReadInto::read(&mut reader, 2).unwrap();
472/// let valid = ReadInto::read(&mut reader, 1).unwrap();
473///
474/// let foo = Foo { bar: bits, valid: valid };
475/// assert_eq!(foo.bar, 3);
476/// assert!(foo.valid);
477/// ```
478pub trait ReadInto
479    where Self: Sized
480{
481    fn read(reader: &mut BitReader, bits: u8) -> Result<Self>;
482}
483
484// There's eight almost identical implementations, let's make this easier.
485macro_rules! impl_read_into {
486    ($T:ty, $method:ident) => (
487        impl ReadInto for $T {
488            fn read(reader: &mut BitReader, bits: u8) -> Result<Self> {
489                reader.$method(bits)
490            }
491        }
492    )
493}
494
495impl_read_into!(u8, read_u8);
496impl_read_into!(u16, read_u16);
497impl_read_into!(u32, read_u32);
498impl_read_into!(u64, read_u64);
499
500impl_read_into!(i8, read_i8);
501impl_read_into!(i16, read_i16);
502impl_read_into!(i32, read_i32);
503impl_read_into!(i64, read_i64);
504
505// We can't cast to bool, so this requires a separate method.
506impl ReadInto for bool {
507    fn read(reader: &mut BitReader, bits: u8) -> Result<Self> {
508        match reader.read_u8(bits)? {
509            0 => Ok(false),
510            _ => Ok(true),
511        }
512    }
513}