dsi_bitstream/codes/mod.rs
1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9//! Traits for reading and writing instantaneous codes.
10//!
11//! This module contains code for reading and writing instantaneous codes.
12//! Codewords are uniformly indexed from 0 for all codes. For example, the first
13//! few words of [unary](crate::traits::BitRead::read_unary), [γ](gamma), and
14//! [δ](delta) codes are:
15//!
16//! | Arg | unary | γ | δ |
17//! |-----|---------:|--------:|---------:|
18//! | 0 | 1 | 1 | 1 |
19//! | 1 | 01 | 010 | 0100 |
20//! | 2 | 001 | 011 | 0101 |
21//! | 3 | 0001 | 00100 | 01100 |
22//! | 4 | 00001 | 00101 | 01101 |
23//! | 5 | 000001 | 00110 | 01110 |
24//! | 6 | 0000001 | 00111 | 01111 |
25//! | 7 | 00000001 | 0001000 | 00100000 |
26//!
27//! If you need to encode signed integers, please use the [`ToInt`] and
28//! [`ToNat`] traits, which provide a bijection between signed integers and
29//! natural numbers.
30//!
31//! Each code is implemented as a pair of traits for reading and writing (e.g.,
32//! [`GammaReadParam`](gamma::GammaReadParam) and
33//! [`GammaWriteParam`](gamma::GammaWriteParam)). The traits for reading depend
34//! on [`BitRead`](crate::traits::BitRead), whereas the traits for writing
35//! depend on [`BitWrite`](crate::traits::BitWrite). Note that most codes cannot
36//! write the number [`u64::MAX`] because of overflow issues, which could be
37//! avoided with tests, but at the price of a significant performance drop.
38//!
39//! The traits ending with `Param` make it possible to specify parameters—for
40//! example, whether to use decoding tables. Usually, one would instead pull
41//! into scope non-parametric traits such as [`GammaRead`] and [`GammaWrite`],
42//! for which defaults are provided using the mechanism described in the
43//! [`params`] module.
44//!
45//! # Big-endian vs. little-endian
46//!
47//! As discussed in the [traits module](crate::traits), in general reversing the
48//! bits of a big-endian bit stream will not yield a little-endian bit stream
49//! containing the same sequence of fixed-width integers. The same is true for
50//! codes, albeit the situation is more complex.
51//!
52//! The only code that can be safely reversed is the unary code. All other codes
53//! contain some value, and that value is written without reversing its bits.
54//! Thus, reversing the bits of a big-endian bit stream containing a sequence of
55//! instantaneous codes will not yield a little-endian bit stream containing the
56//! same sequence of codes (again, with the exception of unary codes).
57//! Technically, the codes written for the little-endian case are different from
58//! those written for the big-endian case.
59//!
60//! For example, the [γ code](gamma) of 4 is `00101` in big-endian order, but it
61//! is `01100` in little-endian order, so that upon reading the unary code for 2
62//! we can read the `01` part without a bit reversal.
63//!
64//! The case of [minimal binary codes](minimal_binary) is even more convoluted:
65//! for example, the code with upper bound 7 has codewords `00`, `010`, `011`,
66//! `100`, `101`, `110`, and `111`. To decode such a code without peeking at
67//! more bits than necessary, one first reads two bits, and then decides, based
68//! on their value, whether to read a further bit and add it on the right. But
69//! this means that we have to encode 2 as `011` in the big-endian case, and as
70//! `101` in the little-endian case, because we need to read the first two bits
71//! to decide whether to read the third one.
72//!
73//! In some cases, we resort to completely *ad hoc* solutions: for example, in
74//! the case of the [ω code](omega), for the little-endian case instead of
75//! reversing the bits written at each recursive call (which in principle would
76//! be necessary), we simply rotate them to the left by one position, exposing
77//! the most significant bit as first bit. This is sufficient to make the
78//! decoding possible, and the rotation is a much faster operation than bit
79//! reversal.
80//!
81//! # Dispatch
82//!
83//! The basic method for accessing codes is through traits like
84//! [`GammaRead`] and [`GammaWrite`]. This approach, however, forces a choice of code in the
85//! source. To pass a choice of code dynamically, please have a look at the
86//! [`dispatch`](crate::dispatch) module.
87
88use num_primitive::{PrimitiveNumberAs, PrimitiveSigned, PrimitiveUnsigned};
89
90pub mod params;
91
92pub mod gamma;
93pub use gamma::{GammaRead, GammaWrite, len_gamma};
94
95pub mod delta;
96pub use delta::{DeltaRead, DeltaWrite, len_delta};
97
98pub mod omega;
99pub use omega::{OmegaRead, OmegaWrite, len_omega};
100
101pub mod minimal_binary;
102pub use minimal_binary::{MinimalBinaryRead, MinimalBinaryWrite, len_minimal_binary};
103
104pub mod zeta;
105pub use zeta::{ZetaRead, ZetaWrite, len_zeta};
106
107pub mod pi;
108pub use pi::{PiRead, PiWrite, len_pi};
109
110pub mod golomb;
111pub use golomb::{GolombRead, GolombWrite, len_golomb};
112
113pub mod rice;
114pub use rice::{RiceRead, RiceWrite, len_rice};
115
116pub mod exp_golomb;
117pub use exp_golomb::{ExpGolombRead, ExpGolombWrite, len_exp_golomb};
118
119pub mod vbyte;
120pub use vbyte::{
121 VByteBeRead, VByteBeWrite, VByteLeRead, VByteLeWrite, bit_len_vbyte, byte_len_vbyte,
122};
123#[cfg(feature = "std")]
124pub use vbyte::{
125 vbyte_read, vbyte_read_be, vbyte_read_le, vbyte_write, vbyte_write_be, vbyte_write_le,
126};
127
128pub mod delta_tables;
129pub mod gamma_tables;
130pub mod omega_tables;
131pub mod pi_tables;
132pub mod zeta_tables;
133
134/// Extension trait mapping natural numbers bijectively to integers.
135///
136/// The method [`to_int`](#method.to_int) will map a natural number `x` to `x
137/// / 2` if `x` is even, and to `−(x + 1) / 2` if `x` is odd. The inverse
138/// transformation is provided by the [`ToNat`] trait.
139///
140/// This pair of bijections makes it possible to use instantaneous codes for
141/// signed integers by mapping them to natural numbers and back.
142///
143/// This bijection is best known as the “ZigZag” transformation in Google's
144/// [Protocol Buffers](https://protobuf.dev/), albeit it has been used by
145/// [WebGraph](http://webgraph.di.unimi.it/) since 2003, and most likely in
146/// other software, for the same purpose. Note that the compression standards
147/// H.264/H.265 use a different transformation for exponential Golomb codes,
148/// mapping a positive integer `x` to `2x − 1` and a zero or negative integer
149/// `x` to `−2x`.
150///
151/// The implementation uses a blanket implementation for all primitive
152/// unsigned integer types.
153pub trait ToInt {
154 type Signed;
155 #[must_use]
156 fn to_int(self) -> Self::Signed;
157}
158
159impl<U: PrimitiveUnsigned + PrimitiveNumberAs<U::Signed>> ToInt for U
160where
161 U::Signed: PrimitiveSigned,
162{
163 type Signed = U::Signed;
164 #[inline]
165 fn to_int(self) -> U::Signed {
166 (self >> 1u32).as_to::<U::Signed>() ^ -((self & U::from(1u8)).as_to::<U::Signed>())
167 }
168}
169
170/// Extension trait mapping signed integers bijectively to natural numbers.
171///
172/// The method [`to_nat`](#method.to_nat) will map a nonnegative integer `x`
173/// to `2x` and a negative integer `x` to `−2x − 1`. The inverse transformation
174/// is provided by the [`ToInt`] trait.
175///
176/// This pair of bijections makes it possible to use instantaneous codes
177/// for signed integers by mapping them to natural numbers and back.
178///
179/// This bijection is best known as the “ZigZag” transformation in Google's
180/// [Protocol Buffers](https://protobuf.dev/), albeit it has been used by
181/// [WebGraph](http://webgraph.di.unimi.it/) since 2003, and most likely in
182/// other software, for the same purpose. Note that the compression standards
183/// H.264/H.265 use a different transformation for exponential Golomb codes,
184/// mapping a positive integer `x` to `2x − 1` and a zero or negative integer
185/// `x` to `−2x`.
186///
187/// The implementation uses a blanket implementation for all primitive
188/// signed integer types.
189pub trait ToNat {
190 type Unsigned;
191 #[must_use]
192 fn to_nat(self) -> Self::Unsigned;
193}
194
195impl<S: PrimitiveSigned + PrimitiveNumberAs<S::Unsigned>> ToNat for S
196where
197 S::Unsigned: PrimitiveUnsigned,
198{
199 type Unsigned = S::Unsigned;
200 #[inline]
201 fn to_nat(self) -> S::Unsigned {
202 (self << 1u32).as_to::<S::Unsigned>() ^ (self >> (S::BITS - 1)).as_to::<S::Unsigned>()
203 }
204}