dsi_bitstream/codes/mod.rs
1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9//! Traits for reading and writing instantaneous codes.
10//!
11//! This module contains code for reading and writing instantaneous codes.
12//! Codewords are uniformly indexed from 0 for all codes. For example, the first
13//! few words of [unary](crate::traits::BitRead::read_unary), [γ](gamma), and
14//! [δ](delta) codes are:
15//!
16//! | Arg | unary | γ | δ |
17//! |-----|---------:|--------:|---------:|
18//! | 0 | 1 | 1 | 1 |
19//! | 1 | 01 | 010 | 0100 |
20//! | 2 | 001 | 011 | 0101 |
21//! | 3 | 0001 | 00100 | 01100 |
22//! | 4 | 00001 | 00101 | 01101 |
23//! | 5 | 000001 | 00110 | 01110 |
24//! | 6 | 0000001 | 00111 | 01111 |
25//! | 7 | 00000001 | 0001000 | 00100000 |
26//!
27//! If you need to encode signed integers, please use the [`ToInt`] and
28//! [`ToNat`] traits, which provide a bijection between signed integers and
29//! natural numbers.
30//!
31//! Each code is implemented as a pair of traits for reading and writing (e.g.,
32//! [`GammaReadParam`](gamma::GammaReadParam) and
33//! [`GammaWriteParam`](gamma::GammaWriteParam)). The traits for reading depend
34//! on [`BitRead`](crate::traits::BitRead), whereas the traits for writing
35//! depend on [`BitWrite`](crate::traits::BitWrite). Note that most codes cannot
36//! write the number [`u64::MAX`] because of overflow issues, which could be
37//! avoided with tests, but at the price of a significant performance drop.
38//!
39//! The traits ending with `Param` make it possible to specify parameters—for
40//! example, whether to use decoding tables. Usually, one would instead pull
41//! into scope non-parametric traits such as [`GammaRead`] and [`GammaWrite`],
42//! for which defaults are provided using the mechanism described in the
43//! [`params`] module.
44//!
45//! # Big-endian vs. little-endian
46//!
47//! As discussed in the [traits module](crate::traits), in general reversing the
48//! bits of a big-endian bit stream will not yield a little-endian bit stream
49//! containing the same sequence of fixed-width integers. The same is true for
50//! codes, albeit the situation is more complex.
51//!
52//! The only code that can be safely reversed is the unary code. All other codes
53//! contain some value, and that value is written without reversing its bits.
54//! Thus, reversing the bits of a big-endian bit stream containing a sequence of
55//! instantaneous codes will not yield a little-endian bit stream containing the
56//! same sequence of codes (again, with the exception of unary codes).
57//! Technically, the codes written for the little-endian case are different from
58//! those written for the big-endian case.
59//!
60//! For example, the [γ code](gamma) of 4 is `00101` in big-endian order, but it
61//! is `01100` in little-endian order, so that upon reading the unary code for 2
62//! we can read the `01` part without a bit reversal.
63//!
64//! The case of [minimal binary codes](minimal_binary) is even more convoluted:
65//! for example, the code with upper bound 7 has codewords `00`, `010`, `011`,
66//! `100`, `101`, `110`, and `111`. To decode such a code without peeking at
67//! more bits than necessary, one first reads two bits, and then decides, based
68//! on their value, whether to read a further bit and add it on the right. But
69//! this means that we have to encode 2 as `011` in the big-endian case, and as
70//! `101` in the little-endian case, because we need to read the first two bits
71//! to decide whether to read the third one.
72//!
73//! In some cases, we resort to completely *ad hoc* solutions: for example, in
74//! the case of the [ω code](omega), for the little-endian case instead of
75//! reversing the bits written at each recursive call (which in principle would
76//! be necessary), we simply rotate them to the left by one position, exposing
77//! the most significant bit as first bit. This is sufficient to make the
78//! decoding possible, and the rotation is a much faster operation than bit
79//! reversal.
80//!
81//! # Dispatch
82//!
83//! The basic method for accessing codes is through traits like
84//! [`GammaRead`] and [`GammaWrite`]. This approach, however, forces a choice of code in the
85//! source. To pass a choice of code dynamically, please have a look at the
86//! [`dispatch`](crate::dispatch) module.
87
88use num_primitive::{PrimitiveSigned, PrimitiveUnsigned};
89use num_traits::{AsPrimitive, ConstOne};
90
91pub mod params;
92
93pub mod gamma;
94pub use gamma::{GammaRead, GammaWrite, len_gamma};
95
96pub mod delta;
97pub use delta::{DeltaRead, DeltaWrite, len_delta};
98
99pub mod omega;
100pub use omega::{OmegaRead, OmegaWrite, len_omega};
101
102pub mod minimal_binary;
103pub use minimal_binary::{MinimalBinaryRead, MinimalBinaryWrite, len_minimal_binary};
104
105pub mod zeta;
106pub use zeta::{ZetaRead, ZetaWrite, len_zeta};
107
108pub mod pi;
109pub use pi::{PiRead, PiWrite, len_pi};
110
111pub mod golomb;
112pub use golomb::{GolombRead, GolombWrite, len_golomb};
113
114pub mod rice;
115pub use rice::{RiceRead, RiceWrite, len_rice};
116
117pub mod exp_golomb;
118pub use exp_golomb::{ExpGolombRead, ExpGolombWrite, len_exp_golomb};
119
120pub mod vbyte;
121pub use vbyte::{
122 VByteBeRead, VByteBeWrite, VByteLeRead, VByteLeWrite, bit_len_vbyte, byte_len_vbyte,
123};
124#[cfg(feature = "std")]
125pub use vbyte::{
126 vbyte_read, vbyte_read_be, vbyte_read_le, vbyte_write, vbyte_write_be, vbyte_write_le,
127};
128
129pub mod delta_tables;
130pub mod gamma_tables;
131pub mod omega_tables;
132pub mod pi_tables;
133pub mod zeta_tables;
134
135/// Extension trait mapping natural numbers bijectively to integers.
136///
137/// The method [`to_int`](#method.to_int) will map a natural number `x` to `x
138/// / 2` if `x` is even, and to `−(x + 1) / 2` if `x` is odd. The inverse
139/// transformation is provided by the [`ToNat`] trait.
140///
141/// This pair of bijections makes it possible to use instantaneous codes for
142/// signed integers by mapping them to natural numbers and back.
143///
144/// This bijection is best known as the “ZigZag” transformation in Google's
145/// [Protocol Buffers](https://protobuf.dev/), albeit it has been used by
146/// [WebGraph](http://webgraph.di.unimi.it/) since 2003, and most likely in
147/// other software, for the same purpose. Note that the compression standards
148/// H.264/H.265 use a different transformation for exponential Golomb codes,
149/// mapping a positive integer `x` to `2x − 1` and a zero or negative integer
150/// `x` to `−2x`.
151///
152/// The implementation uses a blanket implementation for all primitive
153/// unsigned integer types.
154pub trait ToInt {
155 type Signed;
156 #[must_use]
157 fn to_int(self) -> Self::Signed;
158}
159
160impl<U: PrimitiveUnsigned + ConstOne + AsPrimitive<U::Signed>> ToInt for U
161where
162 U::Signed: PrimitiveSigned + Copy + 'static,
163{
164 type Signed = U::Signed;
165 #[inline]
166 fn to_int(self) -> U::Signed {
167 (self >> 1u32).as_() ^ -((self & U::ONE).as_())
168 }
169}
170
171/// Extension trait mapping signed integers bijectively to natural numbers.
172///
173/// The method [`to_nat`](#method.to_nat) will map a nonnegative integer `x`
174/// to `2x` and a negative integer `x` to `−2x − 1`. The inverse transformation
175/// is provided by the [`ToInt`] trait.
176///
177/// This pair of bijections makes it possible to use instantaneous codes
178/// for signed integers by mapping them to natural numbers and back.
179///
180/// This bijection is best known as the “ZigZag” transformation in Google's
181/// [Protocol Buffers](https://protobuf.dev/), albeit it has been used by
182/// [WebGraph](http://webgraph.di.unimi.it/) since 2003, and most likely in
183/// other software, for the same purpose. Note that the compression standards
184/// H.264/H.265 use a different transformation for exponential Golomb codes,
185/// mapping a positive integer `x` to `2x − 1` and a zero or negative integer
186/// `x` to `−2x`.
187///
188/// The implementation uses a blanket implementation for all primitive
189/// signed integer types.
190pub trait ToNat {
191 type Unsigned;
192 #[must_use]
193 fn to_nat(self) -> Self::Unsigned;
194}
195
196impl<S: PrimitiveSigned + AsPrimitive<S::Unsigned>> ToNat for S
197where
198 S::Unsigned: PrimitiveUnsigned + Copy + 'static,
199{
200 type Unsigned = S::Unsigned;
201 #[inline]
202 fn to_nat(self) -> S::Unsigned {
203 (self << 1u32).as_() ^ (self >> (S::BITS - 1)).as_()
204 }
205}