oximedia_bitstream/lib.rs
1// Copyright 2017 Brian Langenberger
2// Copyright 2024-2026 COOLJAPAN OU (Team Kitasan)
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! Bit-level I/O for OxiMedia — a `std`-only fork of
11//! [`bitstream-io`](https://crates.io/crates/bitstream-io) 4.9.0.
12//!
13//! `oximedia-bitstream` provides traits and structs for reading and writing
14//! signed and unsigned integer values to streams that may not be aligned at
15//! a whole byte. Both big-endian and little-endian streams are supported.
16//!
17//! The crate is used internally by `oximedia-codec` for entropy coding
18//! (FLAC predictor coefficients, Vorbis header parsing, AV1 OBU parsing, etc.)
19//! and by `oximedia-container` for MP4 box I/O.
20//!
21//! # Core traits
22//!
23//! | Trait | Purpose |
24//! |-------|---------|
25//! | [`BitRead`] | Read bits from a stream, big- or little-endian |
26//! | [`BitWrite`] | Write bits to a stream, big- or little-endian |
27//! | [`ByteRead`] | Read whole bytes from a byte source |
28//! | [`ByteWrite`] | Write whole bytes to any destination |
29//! | [`FromBitStream`] | Deserialise a struct from a bit reader |
30//! | [`ToBitStream`] | Serialise a struct to a bit writer |
31//!
32//! # Concrete types
33//!
34//! - [`BitReader`] — wraps any `std::io::Read` and exposes bit-level reads
35//! - [`BitWriter`] — wraps any `std::io::Write` and exposes bit-level writes
36//! - [`ByteReader`] — wraps any `std::io::Read` for whole-byte reads
37//! - [`ByteWriter`] — wraps any `std::io::Write` for whole-byte writes
38//! - [`BitRecorder`] (feature `alloc`) — records bits written for later replay
39//! - [`BitsWritten`] — counts bits written without a backing writer
40//!
41//! Huffman coding helpers live in the [`huffman`] module via the
42//! [`FromBits`](huffman::FromBits) and [`ToBits`](huffman::ToBits) traits.
43//!
44//! # Quick start
45//!
46//! ```
47//! use std::io::Cursor;
48//! use oximedia_bitstream::{BigEndian, BitReader, BitRead};
49//!
50//! let data = [0b1011_0100u8, 0b1100_1010u8];
51//! let mut r = BitReader::endian(Cursor::new(&data), BigEndian);
52//!
53//! // Constant-bit-count read — validated at compile time (requires Rust 1.79+)
54//! let high: u8 = r.read::<4, _>().unwrap();
55//! assert_eq!(high, 0b1011);
56//!
57//! // Variable-bit-count read
58//! let low: u8 = r.read_var(4).unwrap();
59//! assert_eq!(low, 0b0100);
60//! ```
61//!
62//! # Endianness
63//!
64//! Pass [`BigEndian`] or [`LittleEndian`] as a zero-sized type parameter
65//! (or value) to [`BitReader::endian`] / [`BitWriter::endian`]. The endianness
66//! is a compile-time phantom; switching endianness mid-stream requires creating
67//! a new reader/writer around the same underlying stream.
68//!
69//! # Feature flags
70//!
71//! | Flag | Default | Effect |
72//! |------|---------|--------|
73//! | `std` | yes | Enables `alloc` |
74//! | `alloc` | via `std` | Enables [`BitRecorder`] |
75//!
76//! # Upstream attribution
77//!
78//! This crate is derived from
79//! [`bitstream-io`](https://crates.io/crates/bitstream-io) 4.9.0 by
80//! Brian Langenberger, licensed under Apache-2.0 / MIT.
81//! The OxiMedia fork removes the `core2` / `no_std` compatibility shim
82//! (OxiMedia targets `std` Rust only) and adapts the crate to the
83//! OxiMedia workspace conventions.
84
85//! # Traits and helpers for bitstream handling functionality
86//!
87//! Bitstream readers are for reading signed and unsigned integer
88//! values from a stream whose sizes may not be whole bytes.
89//! Bitstream writers are for writing signed and unsigned integer
90//! values to a stream, also potentially un-aligned at a whole byte.
91//!
92//! Both big-endian and little-endian streams are supported.
93//!
94//! The only requirement for wrapped reader streams is that they must
95//! implement the [`io::Read`] trait, and the only requirement
96//! for writer streams is that they must implement the [`io::Write`] trait.
97//!
98//! In addition, reader streams do not consume any more bytes
99//! from the underlying reader than necessary, buffering only a
100//! single partial byte as needed.
101//! Writer streams also write out all whole bytes as they are accumulated.
102//!
103//! Readers and writers are also designed to work with integer
104//! types of any possible size.
105//! Many of Rust's built-in integer types are supported by default.
106
107//! # Minimum Compiler Version
108//!
109//! Beginning with version 2.4, the minimum compiler version has been
110//! updated to Rust 1.79.
111//!
112//! The issue is that reading an excessive number of
113//! bits to a type which is too small to hold them,
114//! or writing an excessive number of bits from too small of a type,
115//! are always errors:
116//! ```
117//! use std::io::{Read, Cursor};
118//! use oximedia_bitstream::{BigEndian, BitReader, BitRead};
119//! let data = [0; 10];
120//! let mut r = BitReader::endian(Cursor::new(&data), BigEndian);
121//! let x: Result<u32, _> = r.read_var(64); // reading 64 bits to u32 always fails at runtime
122//! assert!(x.is_err());
123//! ```
124//! but those errors will not be caught until the program runs,
125//! which is less than ideal for the common case in which
126//! the number of bits is already known at compile-time.
127//!
128//! But starting with Rust 1.79, we can now have read and write methods
129//! which take a constant number of bits and can validate the number of bits
130//! are small enough for the type being read/written at compile-time:
131//! ```rust,compile_fail
132//! use std::io::{Read, Cursor};
133//! use oximedia_bitstream::{BigEndian, BitReader, BitRead};
134//! let data = [0; 10];
135//! let mut r = BitReader::endian(Cursor::new(&data), BigEndian);
136//! let x: Result<u32, _> = r.read::<64, _>(); // doesn't compile at all
137//! ```
138//! Since catching potential bugs at compile-time is preferable
139//! to encountering errors at runtime, this will hopefully be
140//! an improvement in the long run.
141
142//! # Changes From 3.X.X
143//!
144//! Version 4.0.0 features significant optimizations to the [`BitRecorder`]
145//! and deprecates the [`BitCounter`] in favor of [`BitsWritten`],
146//! which no longer requires specifying an endianness.
147//!
148//! In addition, the [`BitRead::read_bytes`] and [`BitWrite::write_bytes`]
149//! methods are significantly optimized in the case of non-aligned
150//! reads and writes.
151//!
152//! Finally, the [`Endianness`] traits have been sealed so as not
153//! to be implemented by other packages. Given that other endianness
154//! types are extremely rare in file formats and end users should not
155//! have to implement this trait themselves, this should not be a
156//! concern.
157//!
158//! # Changes From 2.X.X
159//!
160//! Version 3.0.0 has made many breaking changes to the [`BitRead`] and
161//! [`BitWrite`] traits.
162//!
163//! The [`BitRead::read`] method takes a constant number of bits,
164//! and the [`BitRead::read_var`] method takes a variable number of bits
165//! (reversing the older [`BitRead2::read_in`] and [`BitRead2::read`]
166//! calling methods to emphasize using the constant-based one,
167//! which can do more validation at compile-time).
168//! A new [`BitRead2`] trait uses the older calling convention
169//! for compatibility with existing code and is available
170//! for anything implementing [`BitRead`].
171//!
172//! In addition, the main reading methods return primitive types which
173//! implement a new [`Integer`] trait,
174//! which delegates to [`BitRead::read_unsigned`]
175//! or [`BitRead::read_signed`] depending on whether the output
176//! is an unsigned or signed type.
177//!
178//! [`BitWrite::write`] and [`BitWrite::write_var`] work
179//! similarly to the reader's `read` methods, taking anything
180//! that implements [`Integer`] and writing an unsigned or
181//! signed value to [`BitWrite::write_unsigned`] or
182//! [`BitWrite::write_signed`] as appropriate.
183//!
184//! And as with reading, a [`BitWrite2`] trait is offered
185//! for compatibility.
186//!
187//! In addition, the Huffman code handling has been rewritten
188//! to use a small amount of macro magic to write
189//! code to read and write symbols at compile-time.
190//! This is significantly faster than the older version
191//! and can no longer fail to compile at runtime.
192//!
193//! Lastly, there's a new [`BitCount`] struct which wraps a humble
194//! `u32` but encodes the maximum possible number of bits
195//! at the type level.
196//! This is intended for file formats which encode the number
197//! of bits to be read in the format itself.
198//! For example, FLAC's predictor coefficient precision
199//! is a 4 bit value which indicates how large each predictor
200//! coefficient is in bits
201//! (each coefficient might be an `i32` type).
202//! By keeping track of the maximum value at compile time
203//! (4 bits' worth, in this case), we can eliminate
204//! any need to check that coefficients aren't too large
205//! for an `i32` at runtime.
206//! This is accomplished by using [`BitRead::read_count`] to
207//! read a [`BitCount`] and then reading final values with
208//! that number of bits using [`BitRead::read_counted`].
209
210//! # Migrating From Pre 1.0.0
211//!
212//! There are now [`BitRead`] and [`BitWrite`] traits for bitstream
213//! reading and writing (analogous to the standard library's
214//! `Read` and `Write` traits) which you will also need to import.
215//! The upside to this approach is that library consumers
216//! can now make functions and methods generic over any sort
217//! of bit reader or bit writer, regardless of the underlying
218//! stream byte source or endianness.
219
220#![cfg_attr(docsrs, feature(doc_cfg))]
221#![warn(missing_docs)]
222#![forbid(unsafe_code)]
223
224// `PhantomData` is re-used through `super::PhantomData` by the `read` and
225// `write` submodules; keep the alias reachable from the crate root.
226pub(crate) use core::marker::PhantomData;
227use std::io;
228
229pub mod huffman;
230pub mod read;
231pub mod write;
232pub use read::{
233 BitRead, BitRead2, BitReader, ByteRead, ByteReader, FromBitStream, FromBitStreamUsing,
234 FromBitStreamWith, FromByteStream, FromByteStreamUsing, FromByteStreamWith,
235};
236#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
237#[cfg(feature = "alloc")]
238pub use write::BitRecorder;
239pub use write::{
240 BitWrite, BitWrite2, BitWriter, BitsWritten, ByteWrite, ByteWriter, ToBitStream,
241 ToBitStreamUsing, ToBitStreamWith, ToByteStream, ToByteStreamUsing, ToByteStreamWith,
242};
243
244#[allow(deprecated)]
245pub use write::BitCounter;
246
247// Split-out modules — the bitstream runtime surface is carved into
248// focused files so that no single source exceeds the COOLJAPAN 2 000-line
249// refactor guideline while preserving the original public API.
250mod big_endian;
251mod bitcount;
252mod checked;
253mod endian;
254mod integer;
255mod little_endian;
256
257pub use big_endian::{BigEndian, BE};
258pub use bitcount::{BitCount, SignedBitCount};
259pub use checked::{
260 Checkable, CheckablePrimitive, Checked, CheckedError, CheckedSigned, CheckedSignedFixed,
261 CheckedUnsigned, CheckedUnsignedFixed, FixedBitCount, FixedSignedBitCount,
262};
263pub use endian::Endianness;
264pub use integer::{Integer, Numeric, Primitive, SignedInteger, UnsignedInteger, VBRInteger};
265pub use little_endian::{LittleEndian, LE};
266
267mod private {
268 use crate::{
269 io, BitCount, BitRead, BitWrite, CheckedSigned, CheckedUnsigned, Primitive, SignedBitCount,
270 SignedInteger, UnsignedInteger,
271 };
272
273 #[test]
274 fn test_checked_signed() {
275 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<8>(), -128i8).is_ok());
276 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<8>(), 127i8).is_ok());
277 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), -64i8).is_ok());
278 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), 63i8).is_ok());
279 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), -32i8).is_ok());
280 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), 31i8).is_ok());
281 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), -16i8).is_ok());
282 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), 15i8).is_ok());
283 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), -8i8).is_ok());
284 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), 7i8).is_ok());
285 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), -4i8).is_ok());
286 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), 3i8).is_ok());
287 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), -2i8).is_ok());
288 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), 1i8).is_ok());
289 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), -1i8).is_ok());
290 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), 0i8).is_ok());
291
292 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), -65i8).is_err());
293 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<7>(), 64i8).is_err());
294 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), -33i8).is_err());
295 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<6>(), 32i8).is_err());
296 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), -17i8).is_err());
297 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<5>(), 16i8).is_err());
298 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), -9i8).is_err());
299 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<4>(), 8i8).is_err());
300 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), -5i8).is_err());
301 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<3>(), 4i8).is_err());
302 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), -3i8).is_err());
303 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<2>(), 2i8).is_err());
304 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), -2i8).is_err());
305 assert!(CheckedSigned::new(SignedBitCount::<8>::new::<1>(), 1i8).is_err());
306 }
307
308 pub trait Endianness: Sized {
309 /// Pops the next bit from the queue,
310 /// repleneshing it from the given reader if necessary
311 fn pop_bit_refill<R>(
312 reader: &mut R,
313 queue_value: &mut u8,
314 queue_bits: &mut u32,
315 ) -> io::Result<bool>
316 where
317 R: io::Read;
318
319 /// Pops the next unary value from the source until
320 /// `STOP_BIT` is encountered, replenishing it from the given
321 /// closure if necessary.
322 ///
323 /// `STOP_BIT` must be 0 or 1.
324 fn pop_unary<const STOP_BIT: u8, R>(
325 reader: &mut R,
326 queue_value: &mut u8,
327 queue_bits: &mut u32,
328 ) -> io::Result<u32>
329 where
330 R: io::Read;
331
332 /// Pushes the next bit into the queue,
333 /// and returns `Some` value if the queue is full.
334 fn push_bit_flush(queue_value: &mut u8, queue_bits: &mut u32, bit: bool) -> Option<u8>;
335
336 /// For performing bulk reads from a bit source to an output type.
337 fn read_bits<const MAX: u32, R, U>(
338 reader: &mut R,
339 queue_value: &mut u8,
340 queue_bits: &mut u32,
341 count: BitCount<MAX>,
342 ) -> io::Result<U>
343 where
344 R: io::Read,
345 U: UnsignedInteger;
346
347 /// For performing bulk reads from a bit source to an output type.
348 fn read_bits_fixed<const BITS: u32, R, U>(
349 reader: &mut R,
350 queue_value: &mut u8,
351 queue_bits: &mut u32,
352 ) -> io::Result<U>
353 where
354 R: io::Read,
355 U: UnsignedInteger;
356
357 /// For performing a checked write to a bit sink
358 fn write_bits_checked<const MAX: u32, W, U>(
359 writer: &mut W,
360 queue_value: &mut u8,
361 queue_bits: &mut u32,
362 value: CheckedUnsigned<MAX, U>,
363 ) -> io::Result<()>
364 where
365 W: io::Write,
366 U: UnsignedInteger;
367
368 /// For performing a checked signed write to a bit sink
369 fn write_signed_bits_checked<const MAX: u32, W, S>(
370 writer: &mut W,
371 queue_value: &mut u8,
372 queue_bits: &mut u32,
373 value: CheckedSigned<MAX, S>,
374 ) -> io::Result<()>
375 where
376 W: io::Write,
377 S: SignedInteger;
378
379 /// Reads signed value from reader in this endianness
380 fn read_signed_counted<const MAX: u32, R, S>(
381 r: &mut R,
382 bits: SignedBitCount<MAX>,
383 ) -> io::Result<S>
384 where
385 R: BitRead,
386 S: SignedInteger;
387
388 /// Reads whole set of bytes to output buffer
389 fn read_bytes<const CHUNK_SIZE: usize, R>(
390 reader: &mut R,
391 queue_value: &mut u8,
392 queue_bits: u32,
393 buf: &mut [u8],
394 ) -> io::Result<()>
395 where
396 R: io::Read;
397
398 /// Writes whole set of bytes to output buffer
399 fn write_bytes<const CHUNK_SIZE: usize, W>(
400 writer: &mut W,
401 queue_value: &mut u8,
402 queue_bits: u32,
403 buf: &[u8],
404 ) -> io::Result<()>
405 where
406 W: io::Write;
407
408 /// Converts a primitive's byte buffer to a primitive
409 fn bytes_to_primitive<P: Primitive>(buf: P::Bytes) -> P;
410
411 /// Converts a primitive to a primitive's byte buffer
412 fn primitive_to_bytes<P: Primitive>(p: P) -> P::Bytes;
413
414 /// Reads convertable numeric value from reader in this endianness
415 #[deprecated(since = "4.0.0")]
416 fn read_primitive<R, V>(r: &mut R) -> io::Result<V>
417 where
418 R: BitRead,
419 V: Primitive;
420
421 /// Writes convertable numeric value to writer in this endianness
422 #[deprecated(since = "4.0.0")]
423 fn write_primitive<W, V>(w: &mut W, value: V) -> io::Result<()>
424 where
425 W: BitWrite,
426 V: Primitive;
427 }
428
429 pub trait Checkable {
430 fn write_endian<E, W>(
431 self,
432 writer: &mut W,
433 queue_value: &mut u8,
434 queue_bits: &mut u32,
435 ) -> io::Result<()>
436 where
437 E: Endianness,
438 W: io::Write;
439 }
440}