dsi_bitstream/codes/omega.rs
1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Elias ω code.
9//!
10//! Elias [γ](super::gamma) and [δ](super::delta) codes encode a number *n* by
11//! storing the binary representation of *n* + 1, with the most significant bit
12//! removed, prefixed by its length in unary or [γ](super::gamma) code,
13//! respectively. Thus, [δ](super::delta) can be seen as adding one level of
14//! recursion in the length representation with respect to [γ](super::gamma).
15//! The ω code encodes the length of the binary representation of *n* + 1
16//! recursively.
17//!
18//! The implied distribution for the ω code is difficult to write analytically,
19//! but essentially it is as close as possible to ≈ 1/*x* (as there is no code
20//! for that distribution).
21//!
22//! The supported range is [0 . . 2⁶⁴ – 1).
23//!
24//! The ω code is easier to describe the format of a code, rather than the
25//! encoding algorithm.
26//!
27//! A codeword is given by the concatenation of blocks *b*₀ *b*₁ … *b*ₙ `0`,
28//! where each block *b*ᵢ is a binary string starting with `1` and *b*₀ = `10`
29//! or `11`. One can interpret the highest bit of each block as a continuation
30//! bit, and the last `0` as a terminator of the code.
31//!
32//! The condition for a valid codeword is that the value represented by each
33//! block, incremented by one, is the length of the following block, except for
34//! the last block.
35//!
36//! The value associated with a codeword is 0 if the code is `0`, and otherwise
37//! the value of the last block, decremented by one.
38//!
39//! For example, `1110110`, which is formed by the blocks `11`, `1011`, and `0`,
40//! represents the number 10.
41//!
42//! As discussed in the [codes module documentation](crate::codes), to make the
43//! code readable in the little-endian case, rather than reversing the bits of
44//! the blocks, which would be expensive, we simply rotate by one on the left
45//! each block, with the result that the most significant bit of the block is
46//! now the first bit in the stream, making it possible to check for the
47//! presence of a continuation bit. For example, in the little-endian case, the
48//! code for 10 is `0011111`, which is formed by the blocks `11`, `0111`, and
49//! `0`.
50//!
51//! # Table-Based Optimization
52//!
53//! Unlike [γ](super::gamma), [δ](super::delta), and [ζ](super::zeta) codes, ω
54//! codes use a special optimization for partial decoding. Due to the recursive
55//! nature of ω codes, when a complete codeword cannot be read from the table
56//! the table still provides partial information about the blocks that were
57//! successfully decoded. This partial state is used to continue decoding
58//! efficiently, avoiding re-reading the initial blocks.
59//!
60//! # References
61//!
62//! Peter Elias. “[Universal codeword sets and representations of the
63//! integers](https://doi.org/10.1109/TIT.1975.1055349)”. IEEE Transactions on
64//! Information Theory, 21(2):194−203, March 1975.
65
66use crate::{codes::omega_tables, prelude::*};
67use common_traits::CastableInto;
68
69/// Returns the length of the ω code for `n`.
70#[inline(always)]
71pub fn len_omega_param<const USE_TABLE: bool>(n: u64) -> usize {
72 debug_assert!(n < u64::MAX);
73 if USE_TABLE {
74 if let Some(len) = omega_tables::LEN.get(n as usize) {
75 return *len as usize;
76 }
77 }
78 recursive_len(n + 1)
79}
80
81/// Returns the length of the ω code for `n`.
82#[inline(always)]
83pub fn len_omega(n: u64) -> usize {
84 debug_assert!(n < u64::MAX);
85 len_omega_param::<true>(n)
86}
87
88fn recursive_len(n: u64) -> usize {
89 if n <= 1 {
90 return 1;
91 }
92 let λ = n.ilog2() as u64;
93 recursive_len(λ) + λ as usize + 1
94}
95
96/// Trait for reading ω codes.
97///
98/// This is the trait you should usually pull in scope to read ω codes.
99pub trait OmegaRead<E: Endianness>: BitRead<E> {
100 fn read_omega(&mut self) -> Result<u64, Self::Error>;
101}
102
103/// Parametric trait for reading ω codes.
104///
105/// This trait is is more general than [`OmegaRead`], as it makes it possible
106/// to specify how to use tables using const parameters.
107///
108/// We provide an implementation of this trait for [`BitRead`]. An implementation
109/// of [`OmegaRead`] using default values is usually provided exploiting the
110/// [`crate::codes::params::ReadParams`] mechanism.
111pub trait OmegaReadParam<E: Endianness>: BitRead<E> {
112 fn read_omega_param<const USE_TABLES: bool>(&mut self) -> Result<u64, Self::Error>;
113}
114
115/// Default, internal non-table based implementation that works
116/// for any endianness.
117#[inline(always)]
118fn default_read_omega<E: Endianness, B: BitRead<E>>(backend: &mut B) -> Result<u64, B::Error> {
119 read_omega_from_state::<E, B>(backend, 1)
120}
121
122/// Internal implementation that continues reading from a given state.
123///
124/// This is used both by the default implementation (starting from state n=1)
125/// and by the table-accelerated version (continuing from partial state).
126/// The bits have already been skipped by the caller.
127#[inline(always)]
128fn read_omega_from_state<E: Endianness, B: BitRead<E>>(
129 backend: &mut B,
130 mut n: u64,
131) -> Result<u64, B::Error> {
132 loop {
133 let bit = backend.peek_bits(1)?.cast();
134 if bit == 0 {
135 backend.skip_bits_after_peek(1);
136 return Ok(n - 1);
137 }
138
139 let λ = n;
140 n = backend.read_bits(λ as usize + 1)?;
141
142 if E::IS_LITTLE {
143 // Little-endian case: rotate right the lower λ + 1 bits (the lowest
144 // bit is a one) to reverse the rotation performed when writing
145 n = (n >> 1) | (1 << λ);
146 }
147 }
148}
149
150impl<B: BitRead<BE>> OmegaReadParam<BE> for B {
151 #[inline(always)]
152 fn read_omega_param<const USE_TABLES: bool>(&mut self) -> Result<u64, Self::Error> {
153 if USE_TABLES {
154 let (len_with_flag, value) = omega_tables::read_table_be(self);
155 if (len_with_flag & 0x80) == 0 {
156 // Complete code - bits already skipped in read_table
157 return Ok(value);
158 } else {
159 // Partial code - bits already skipped in read_table, continue from partial_n
160 return read_omega_from_state::<BE, _>(self, value);
161 }
162 }
163 default_read_omega(self)
164 }
165}
166
167impl<B: BitRead<LE>> OmegaReadParam<LE> for B {
168 #[inline(always)]
169 fn read_omega_param<const USE_TABLES: bool>(&mut self) -> Result<u64, Self::Error> {
170 if USE_TABLES {
171 let (len_with_flag, value) = omega_tables::read_table_le(self);
172 if (len_with_flag & 0x80) == 0 {
173 // Complete code - bits already skipped in read_table
174 return Ok(value);
175 } else {
176 // Partial code - bits already skipped in read_table, continue from partial_n
177 return read_omega_from_state::<LE, _>(self, value);
178 }
179 }
180 default_read_omega(self)
181 }
182}
183
184/// Trait for writing ω codes.
185///
186/// This is the trait you should usually pull in scope to write ω codes.
187pub trait OmegaWrite<E: Endianness>: BitWrite<E> {
188 fn write_omega(&mut self, value: u64) -> Result<usize, Self::Error>;
189}
190
191/// Parametric trait for writing ω codes.
192///
193/// This trait is is more general than [`OmegaWrite`], as it makes it possible
194/// to specify how to use tables using const parameters.
195///
196/// We provide an implementation of this trait for [`BitWrite`]. An implementation
197/// of [`OmegaWrite`] using default values is usually provided exploiting the
198/// [`crate::codes::params::WriteParams`] mechanism.
199pub trait OmegaWriteParam<E: Endianness>: BitWrite<E> {
200 fn write_omega_param<const USE_TABLES: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
201}
202
203impl<B: BitWrite<BE>> OmegaWriteParam<BE> for B {
204 #[inline(always)]
205 fn write_omega_param<const USE_TABLES: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
206 debug_assert!(n < u64::MAX);
207 if USE_TABLES {
208 if let Ok(Some(len)) = omega_tables::write_table_be(self, n) {
209 return Ok(len);
210 }
211 }
212 Ok(recursive_omega_write::<BE, _>(n + 1, self)? + self.write_bits(0, 1)?)
213 }
214}
215
216impl<B: BitWrite<LE>> OmegaWriteParam<LE> for B {
217 #[inline(always)]
218 fn write_omega_param<const USE_TABLES: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
219 debug_assert!(n < u64::MAX);
220 if USE_TABLES {
221 if let Ok(Some(len)) = omega_tables::write_table_le(self, n) {
222 return Ok(len);
223 }
224 }
225 Ok(recursive_omega_write::<LE, _>(n + 1, self)? + self.write_bits(0, 1)?)
226 }
227}
228
229#[inline(always)]
230fn recursive_omega_write<E: Endianness, B: BitWrite<E>>(
231 mut n: u64,
232 writer: &mut B,
233) -> Result<usize, B::Error> {
234 if n <= 1 {
235 return Ok(0);
236 }
237 let λ = n.ilog2();
238 if E::IS_LITTLE {
239 #[cfg(feature = "checks")]
240 {
241 // Clean up after the lowest λ bits in case checks are enabled
242 n &= u64::MAX >> (u64::BITS - λ);
243 }
244 // Little-endian case: rotate left the lower λ + 1 bits (the bit in
245 // position λ is a one) so that the lowest bit can be peeked to find the
246 // block.
247 n = (n << 1) | 1;
248 }
249 Ok(recursive_omega_write::<E, _>(λ as u64, writer)? + writer.write_bits(n, λ as usize + 1)?)
250}
251
252#[cfg(test)]
253mod test {
254 use crate::prelude::*;
255
256 #[test]
257 #[allow(clippy::unusual_byte_groupings)]
258 fn test_omega() {
259 for (value, expected_be, expected_le) in [
260 (0, 0, 0),
261 (1, 0b10_0 << (64 - 3), 0b0_01),
262 (2, 0b11_0 << (64 - 3), 0b0_11),
263 (3, 0b10_100_0 << (64 - 6), 0b0_001_01),
264 (4, 0b10_101_0 << (64 - 6), 0b0_011_01),
265 (5, 0b10_110_0 << (64 - 6), 0b0_101_01),
266 (6, 0b10_111_0 << (64 - 6), 0b0_111_01),
267 (7, 0b11_1000_0 << (64 - 7), 0b0_0001_11),
268 (15, 0b10_100_10000_0 << (64 - 11), 0b0_00001_001_01),
269 (99, 0b10_110_1100100_0 << (64 - 13), 0b0_1001001_101_01),
270 (
271 999,
272 0b11_1001_1111101000_0 << (64 - 17),
273 0b0_1111010001_0011_11,
274 ),
275 (
276 999_999,
277 0b10_100_10011_11110100001001000000_0 << (64 - 31),
278 0b0_11101000010010000001_00111_001_01,
279 ),
280 ] {
281 let mut data = [0_u64];
282 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
283 writer.write_omega(value).unwrap();
284 drop(writer);
285 assert_eq!(
286 data[0].to_be(),
287 expected_be,
288 "\nfor value: {}\ngot: {:064b}\nexp: {:064b}\n",
289 value,
290 data[0].to_be(),
291 expected_be,
292 );
293
294 let mut data = [0_u64];
295 let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
296 writer.write_omega(value).unwrap();
297 drop(writer);
298 assert_eq!(
299 data[0].to_le(),
300 expected_le,
301 "\nfor value: {}\ngot: {:064b}\nexp: {:064b}\n",
302 value,
303 data[0].to_le(),
304 expected_le,
305 );
306 }
307 }
308}