Skip to main content

oximedia_bitstream/
lib.rs

1// Copyright 2017 Brian Langenberger
2// Copyright 2024-2026 COOLJAPAN OU (Team Kitasan)
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! Bit-level I/O for OxiMedia — a `std`-only fork of
11//! [`bitstream-io`](https://crates.io/crates/bitstream-io) 4.9.0.
12//!
13//! `oximedia-bitstream` provides traits and structs for reading and writing
14//! signed and unsigned integer values to streams that may not be aligned at
15//! a whole byte.  Both big-endian and little-endian streams are supported.
16//!
17//! The crate is used internally by `oximedia-codec` for entropy coding
18//! (FLAC predictor coefficients, Vorbis header parsing, AV1 OBU parsing, etc.)
19//! and by `oximedia-container` for MP4 box I/O.
20//!
21//! # Core traits
22//!
23//! | Trait | Purpose |
24//! |-------|---------|
25//! | [`BitRead`] | Read bits from a stream, big- or little-endian |
26//! | [`BitWrite`] | Write bits to a stream, big- or little-endian |
27//! | [`ByteRead`] | Read whole bytes from a byte source |
28//! | [`ByteWrite`] | Write whole bytes to any destination |
29//! | [`FromBitStream`] | Deserialise a struct from a bit reader |
30//! | [`ToBitStream`] | Serialise a struct to a bit writer |
31//!
32//! # Concrete types
33//!
34//! - [`BitReader`] — wraps any `std::io::Read` and exposes bit-level reads
35//! - [`BitWriter`] — wraps any `std::io::Write` and exposes bit-level writes
36//! - [`ByteReader`] — wraps any `std::io::Read` for whole-byte reads
37//! - [`ByteWriter`] — wraps any `std::io::Write` for whole-byte writes
38//! - [`BitRecorder`] (feature `alloc`) — records bits written for later replay
39//! - [`BitsWritten`] — counts bits written without a backing writer
40//!
41//! Huffman coding helpers live in the [`huffman`] module via the
42//! [`FromBits`](huffman::FromBits) and [`ToBits`](huffman::ToBits) traits.
43//!
44//! # Quick start
45//!
46//! ```
47//! use std::io::Cursor;
48//! use oximedia_bitstream::{BigEndian, BitReader, BitRead};
49//!
50//! let data = [0b1011_0100u8, 0b1100_1010u8];
51//! let mut r = BitReader::endian(Cursor::new(&data), BigEndian);
52//!
53//! // Constant-bit-count read — validated at compile time (requires Rust 1.79+)
54//! let high: u8 = r.read::<4, _>().unwrap();
55//! assert_eq!(high, 0b1011);
56//!
57//! // Variable-bit-count read
58//! let low: u8 = r.read_var(4).unwrap();
59//! assert_eq!(low, 0b0100);
60//! ```
61//!
62//! # Endianness
63//!
64//! Pass [`BigEndian`] or [`LittleEndian`] as a zero-sized type parameter
65//! (or value) to [`BitReader::endian`] / [`BitWriter::endian`].  The endianness
66//! is a compile-time phantom; switching endianness mid-stream requires creating
67//! a new reader/writer around the same underlying stream.
68//!
69//! # Feature flags
70//!
71//! | Flag | Default | Effect |
72//! |------|---------|--------|
73//! | `std` | yes | Enables `alloc` |
74//! | `alloc` | via `std` | Enables [`BitRecorder`] |
75//!
76//! # Upstream attribution
77//!
78//! This crate is derived from
79//! [`bitstream-io`](https://crates.io/crates/bitstream-io) 4.9.0 by
80//! Brian Langenberger, licensed under Apache-2.0 / MIT.
81//! The OxiMedia fork removes the `core2` / `no_std` compatibility shim
82//! (OxiMedia targets `std` Rust only) and adapts the crate to the
83//! OxiMedia workspace conventions.
84
85//! # Traits and helpers for bitstream handling functionality
86//!
87//! Bitstream readers are for reading signed and unsigned integer
88//! values from a stream whose sizes may not be whole bytes.
89//! Bitstream writers are for writing signed and unsigned integer
90//! values to a stream, also potentially un-aligned at a whole byte.
91//!
92//! Both big-endian and little-endian streams are supported.
93//!
94//! The only requirement for wrapped reader streams is that they must
95//! implement the [`io::Read`] trait, and the only requirement
96//! for writer streams is that they must implement the [`io::Write`] trait.
97//!
98//! In addition, reader streams do not consume any more bytes
99//! from the underlying reader than necessary, buffering only a
100//! single partial byte as needed.
101//! Writer streams also write out all whole bytes as they are accumulated.
102//!
103//! Readers and writers are also designed to work with integer
104//! types of any possible size.
105//! Many of Rust's built-in integer types are supported by default.
106
107//! # Minimum Compiler Version
108//!
109//! Beginning with version 2.4, the minimum compiler version has been
110//! updated to Rust 1.79.
111//!
112//! The issue is that reading an excessive number of
113//! bits to a type which is too small to hold them,
114//! or writing an excessive number of bits from too small of a type,
115//! are always errors:
116//! ```
117//! use std::io::{Read, Cursor};
118//! use oximedia_bitstream::{BigEndian, BitReader, BitRead};
119//! let data = [0; 10];
120//! let mut r = BitReader::endian(Cursor::new(&data), BigEndian);
121//! let x: Result<u32, _> = r.read_var(64);  // reading 64 bits to u32 always fails at runtime
122//! assert!(x.is_err());
123//! ```
124//! but those errors will not be caught until the program runs,
125//! which is less than ideal for the common case in which
126//! the number of bits is already known at compile-time.
127//!
128//! But starting with Rust 1.79, we can now have read and write methods
129//! which take a constant number of bits and can validate the number of bits
130//! are small enough for the type being read/written at compile-time:
131//! ```rust,compile_fail
132//! use std::io::{Read, Cursor};
133//! use oximedia_bitstream::{BigEndian, BitReader, BitRead};
134//! let data = [0; 10];
135//! let mut r = BitReader::endian(Cursor::new(&data), BigEndian);
136//! let x: Result<u32, _> = r.read::<64, _>();  // doesn't compile at all
137//! ```
138//! Since catching potential bugs at compile-time is preferable
139//! to encountering errors at runtime, this will hopefully be
140//! an improvement in the long run.
141
142//! # Changes From 3.X.X
143//!
144//! Version 4.0.0 features significant optimizations to the [`BitRecorder`]
145//! and deprecates the [`BitCounter`] in favor of [`BitsWritten`],
146//! which no longer requires specifying an endianness.
147//!
148//! In addition, the [`BitRead::read_bytes`] and [`BitWrite::write_bytes`]
149//! methods are significantly optimized in the case of non-aligned
150//! reads and writes.
151//!
152//! Finally, the [`Endianness`] traits have been sealed so as not
153//! to be implemented by other packages.  Given that other endianness
154//! types are extremely rare in file formats and end users should not
155//! have to implement this trait themselves, this should not be a
156//! concern.
157//!
158//! # Changes From 2.X.X
159//!
160//! Version 3.0.0 has made many breaking changes to the [`BitRead`] and
161//! [`BitWrite`] traits.
162//!
163//! The [`BitRead::read`] method takes a constant number of bits,
164//! and the [`BitRead::read_var`] method takes a variable number of bits
165//! (reversing the older [`BitRead2::read_in`] and [`BitRead2::read`]
166//! calling methods to emphasize using the constant-based one,
167//! which can do more validation at compile-time).
168//! A new [`BitRead2`] trait uses the older calling convention
169//! for compatibility with existing code and is available
170//! for anything implementing [`BitRead`].
171//!
172//! In addition, the main reading methods return primitive types which
173//! implement a new [`Integer`] trait,
174//! which delegates to [`BitRead::read_unsigned`]
175//! or [`BitRead::read_signed`] depending on whether the output
176//! is an unsigned or signed type.
177//!
178//! [`BitWrite::write`] and [`BitWrite::write_var`] work
179//! similarly to the reader's `read` methods, taking anything
180//! that implements [`Integer`] and writing an unsigned or
181//! signed value to [`BitWrite::write_unsigned`] or
182//! [`BitWrite::write_signed`] as appropriate.
183//!
184//! And as with reading, a [`BitWrite2`] trait is offered
185//! for compatibility.
186//!
187//! In addition, the Huffman code handling has been rewritten
188//! to use a small amount of macro magic to write
189//! code to read and write symbols at compile-time.
190//! This is significantly faster than the older version
191//! and can no longer fail to compile at runtime.
192//!
193//! Lastly, there's a new [`BitCount`] struct which wraps a humble
194//! `u32` but encodes the maximum possible number of bits
195//! at the type level.
196//! This is intended for file formats which encode the number
197//! of bits to be read in the format itself.
198//! For example, FLAC's predictor coefficient precision
199//! is a 4 bit value which indicates how large each predictor
200//! coefficient is in bits
201//! (each coefficient might be an `i32` type).
202//! By keeping track of the maximum value at compile time
203//! (4 bits' worth, in this case), we can eliminate
204//! any need to check that coefficients aren't too large
205//! for an `i32` at runtime.
206//! This is accomplished by using [`BitRead::read_count`] to
207//! read a [`BitCount`] and then reading final values with
208//! that number of bits using [`BitRead::read_counted`].
209
210//! # Migrating From Pre 1.0.0
211//!
212//! There are now [`BitRead`] and [`BitWrite`] traits for bitstream
213//! reading and writing (analogous to the standard library's
214//! `Read` and `Write` traits) which you will also need to import.
215//! The upside to this approach is that library consumers
216//! can now make functions and methods generic over any sort
217//! of bit reader or bit writer, regardless of the underlying
218//! stream byte source or endianness.
219
220#![cfg_attr(docsrs, feature(doc_cfg))]
221#![warn(missing_docs)]
222#![forbid(unsafe_code)]
223
224// `PhantomData` is re-used through `super::PhantomData` by the `read` and
225// `write` submodules; keep the alias reachable from the crate root.
226pub(crate) use core::marker::PhantomData;
227use std::io;
228
229pub mod huffman;
230pub mod read;
231pub mod write;
232pub use read::{
233    BitRead, BitRead2, BitReader, ByteRead, ByteReader, FromBitStream, FromBitStreamUsing,
234    FromBitStreamWith, FromByteStream, FromByteStreamUsing, FromByteStreamWith,
235};
236#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
237#[cfg(feature = "alloc")]
238pub use write::BitRecorder;
239pub use write::{
240    BitWrite, BitWrite2, BitWriter, BitsWritten, ByteWrite, ByteWriter, ToBitStream,
241    ToBitStreamUsing, ToBitStreamWith, ToByteStream, ToByteStreamUsing, ToByteStreamWith,
242};
243
244#[allow(deprecated)]
245pub use write::BitCounter;
246
247// Split-out modules — the bitstream runtime surface is carved into
248// focused files so that no single source exceeds the COOLJAPAN 2 000-line
249// refactor guideline while preserving the original public API.
250mod big_endian;
251mod bitcount;
252mod checked;
253mod endian;
254mod integer;
255mod little_endian;
256
257pub use big_endian::{BigEndian, BE};
258pub use bitcount::{BitCount, SignedBitCount};
259pub use checked::{
260    Checkable, CheckablePrimitive, Checked, CheckedError, CheckedSigned, CheckedSignedFixed,
261    CheckedUnsigned, CheckedUnsignedFixed, FixedBitCount, FixedSignedBitCount,
262};
263pub use endian::Endianness;
264pub use integer::{Integer, Numeric, Primitive, SignedInteger, UnsignedInteger, VBRInteger};
265pub use little_endian::{LittleEndian, LE};
266
267mod private {
268    use crate::{
269        io, BitCount, BitRead, BitWrite, CheckedSigned, CheckedUnsigned, Primitive, SignedBitCount,
270        SignedInteger, UnsignedInteger,
271    };
272
273    #[test]
274    fn test_checked_signed() {
275        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<8>(), -128i8).is_ok());
276        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<8>(), 127i8).is_ok());
277        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), -64i8).is_ok());
278        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), 63i8).is_ok());
279        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), -32i8).is_ok());
280        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), 31i8).is_ok());
281        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), -16i8).is_ok());
282        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), 15i8).is_ok());
283        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), -8i8).is_ok());
284        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), 7i8).is_ok());
285        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), -4i8).is_ok());
286        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), 3i8).is_ok());
287        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), -2i8).is_ok());
288        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), 1i8).is_ok());
289        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), -1i8).is_ok());
290        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), 0i8).is_ok());
291
292        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), -65i8).is_err());
293        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), 64i8).is_err());
294        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), -33i8).is_err());
295        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), 32i8).is_err());
296        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), -17i8).is_err());
297        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), 16i8).is_err());
298        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), -9i8).is_err());
299        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), 8i8).is_err());
300        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), -5i8).is_err());
301        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), 4i8).is_err());
302        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), -3i8).is_err());
303        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), 2i8).is_err());
304        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), -2i8).is_err());
305        assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), 1i8).is_err());
306    }
307
308    pub trait Endianness: Sized {
309        /// Pops the next bit from the queue,
310        /// repleneshing it from the given reader if necessary
311        fn pop_bit_refill<R>(
312            reader: &mut R,
313            queue_value: &mut u8,
314            queue_bits: &mut u32,
315        ) -> io::Result<bool>
316        where
317            R: io::Read;
318
319        /// Pops the next unary value from the source until
320        /// `STOP_BIT` is encountered, replenishing it from the given
321        /// closure if necessary.
322        ///
323        /// `STOP_BIT` must be 0 or 1.
324        fn pop_unary<const STOP_BIT: u8, R>(
325            reader: &mut R,
326            queue_value: &mut u8,
327            queue_bits: &mut u32,
328        ) -> io::Result<u32>
329        where
330            R: io::Read;
331
332        /// Pushes the next bit into the queue,
333        /// and returns `Some` value if the queue is full.
334        fn push_bit_flush(queue_value: &mut u8, queue_bits: &mut u32, bit: bool) -> Option<u8>;
335
336        /// For performing bulk reads from a bit source to an output type.
337        fn read_bits<const MAX: u32, R, U>(
338            reader: &mut R,
339            queue_value: &mut u8,
340            queue_bits: &mut u32,
341            count: BitCount<MAX>,
342        ) -> io::Result<U>
343        where
344            R: io::Read,
345            U: UnsignedInteger;
346
347        /// For performing bulk reads from a bit source to an output type.
348        fn read_bits_fixed<const BITS: u32, R, U>(
349            reader: &mut R,
350            queue_value: &mut u8,
351            queue_bits: &mut u32,
352        ) -> io::Result<U>
353        where
354            R: io::Read,
355            U: UnsignedInteger;
356
357        /// For performing a checked write to a bit sink
358        fn write_bits_checked<const MAX: u32, W, U>(
359            writer: &mut W,
360            queue_value: &mut u8,
361            queue_bits: &mut u32,
362            value: CheckedUnsigned<MAX, U>,
363        ) -> io::Result<()>
364        where
365            W: io::Write,
366            U: UnsignedInteger;
367
368        /// For performing a checked signed write to a bit sink
369        fn write_signed_bits_checked<const MAX: u32, W, S>(
370            writer: &mut W,
371            queue_value: &mut u8,
372            queue_bits: &mut u32,
373            value: CheckedSigned<MAX, S>,
374        ) -> io::Result<()>
375        where
376            W: io::Write,
377            S: SignedInteger;
378
379        /// Reads signed value from reader in this endianness
380        fn read_signed_counted<const MAX: u32, R, S>(
381            r: &mut R,
382            bits: SignedBitCount<MAX>,
383        ) -> io::Result<S>
384        where
385            R: BitRead,
386            S: SignedInteger;
387
388        /// Reads whole set of bytes to output buffer
389        fn read_bytes<const CHUNK_SIZE: usize, R>(
390            reader: &mut R,
391            queue_value: &mut u8,
392            queue_bits: u32,
393            buf: &mut [u8],
394        ) -> io::Result<()>
395        where
396            R: io::Read;
397
398        /// Writes whole set of bytes to output buffer
399        fn write_bytes<const CHUNK_SIZE: usize, W>(
400            writer: &mut W,
401            queue_value: &mut u8,
402            queue_bits: u32,
403            buf: &[u8],
404        ) -> io::Result<()>
405        where
406            W: io::Write;
407
408        /// Converts a primitive's byte buffer to a primitive
409        fn bytes_to_primitive<P: Primitive>(buf: P::Bytes) -> P;
410
411        /// Converts a primitive to a primitive's byte buffer
412        fn primitive_to_bytes<P: Primitive>(p: P) -> P::Bytes;
413
414        /// Reads convertable numeric value from reader in this endianness
415        #[deprecated(since = "4.0.0")]
416        fn read_primitive<R, V>(r: &mut R) -> io::Result<V>
417        where
418            R: BitRead,
419            V: Primitive;
420
421        /// Writes convertable numeric value to writer in this endianness
422        #[deprecated(since = "4.0.0")]
423        fn write_primitive<W, V>(w: &mut W, value: V) -> io::Result<()>
424        where
425            W: BitWrite,
426            V: Primitive;
427    }
428
429    pub trait Checkable {
430        fn write_endian<E, W>(
431            self,
432            writer: &mut W,
433            queue_value: &mut u8,
434            queue_bits: &mut u32,
435        ) -> io::Result<()>
436        where
437            E: Endianness,
438            W: io::Write;
439    }
440}