dsi_bitstream/codes/omega.rs
1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Elias ω code.
9//!
10//! Elias [γ](super::gamma) and [δ](super::delta) codes encode a number *n* by
11//! storing the binary representation of *n* + 1, with the most significant bit
12//! removed, prefixed by its length in unary or [γ](super::gamma) code,
13//! respectively. Thus, [δ](super::delta) can be seen as adding one level of
14//! recursion in the length representation with respect to [γ](super::gamma).
15//! The ω code encodes the length of the binary representation of *n* + 1
16//! recursively.
17//!
18//! The implied distribution for the ω code is difficult to write analytically,
19//! but essentially it is as close as possible to ≈ 1/*x* (as there is no code
20//! for that distribution).
21//!
22//! The ω code is easier to describe the format of a code, rather than the
23//! encoding algorithm.
24//!
25//! A codeword is given by the concatenation of blocks *b*₀ *b*₁ … *b*ₙ `0`,
26//! where each block *b*ᵢ is a binary string starting with `1` and *b*₀ = `10`
27//! or `11`. One can interpret the highest bit of each block as a continuation
28//! bit, and the last `0` as a terminator of the code.
29//!
30//! The condition for a valid codeword is that the value represented by each
31//! block, incremented by one, is the length of the following block, except for
32//! the last block.
33//!
34//! The value associated with a codeword is 0 if the code is `0`, and otherwise
35//! the value of the last block, decremented by one.
36//!
37//! For example, `1110010`, which is formed by the blocks `11`, `1011`, and `0`,
38//! represents 10.
39//!
40//! As discussed in the [codes module documentation](crate::codes), to make the
41//! code readable in the little-endian case, rather than reversing the bits of
42//! the blocks, which would be expensive, we simply rotate by one on the left
43//! each block, with the result that the most significant bit of the block is
44//! now the first bit in the stream, making it possible to check for the
45//! presence of a continuation bit. For example, in the little-endian case, the
46//! code for 10 is `0011111`, which is formed by the blocks `11`, `0111`, and
47//! `0`.
48//!
49//! # References
50//!
51//! Peter Elias. “[Universal codeword sets and representations of the
52//! integers](https://doi.org/10.1109/TIT.1975.1055349)”. IEEE Transactions on
53//! Information Theory, 21(2):194−203, March 1975.
54
55use crate::traits::*;
56use common_traits::CastableInto;
57
58/// Returns the length of the ω code for `n`.
59#[inline(always)]
60pub fn len_omega(n: u64) -> usize {
61 recursive_len(n + 1)
62}
63
64fn recursive_len(n: u64) -> usize {
65 if n <= 1 {
66 return 1;
67 }
68 let λ = n.ilog2() as u64;
69 recursive_len(λ) + λ as usize + 1
70}
71
72/// Trait for reading ω codes.
73///
74/// This is the trait you should pull in scope to read ω codes.
75pub trait OmegaRead<E: Endianness>: BitRead<E> {
76 #[inline(always)]
77 fn read_omega(&mut self) -> Result<u64, Self::Error> {
78 let mut n = 1;
79 loop {
80 let bit = self.peek_bits(1)?.cast();
81 if bit == 0 {
82 self.skip_bits_after_peek(1);
83 return Ok(n - 1);
84 }
85
86 let λ = n;
87 n = self.read_bits(λ as usize + 1)?;
88
89 if core::any::TypeId::of::<E>() == core::any::TypeId::of::<LE>() {
90 // Little-endian case: rotate right the lower λ + 1 bits (the
91 // lowest bit is a one) to reverse the rotation performed when
92 // writing
93 n = (n >> 1) | (1 << λ);
94 }
95 }
96 }
97}
98
99/// Trait for writing ω codes.
100///
101/// This is the trait you should pull in scope to write ω codes.
102pub trait OmegaWrite<E: Endianness>: BitWrite<E> {
103 #[inline(always)]
104 fn write_omega(&mut self, n: u64) -> Result<usize, Self::Error> {
105 // omega codes are indexed from 1
106 Ok(recursive_write::<E, Self>(n + 1, self)? + self.write_bits(0, 1)?)
107 }
108}
109
110fn recursive_write<E: Endianness, B: BitWrite<E> + ?Sized>(
111 mut n: u64,
112 writer: &mut B,
113) -> Result<usize, B::Error> {
114 if n <= 1 {
115 return Ok(0);
116 }
117 let λ = n.ilog2();
118
119 if core::any::TypeId::of::<E>() == core::any::TypeId::of::<LE>() {
120 // Little-endian case: rotate left the lower λ + 1 bits (the bit in
121 // position λ is a one) so that the lowest bit can be peeked to find the
122 // block.
123 n = (n << 1) | 1;
124 #[cfg(feature = "checks")]
125 {
126 // Clean up n in case checks are enabled
127 n &= u64::MAX >> (u64::BITS - 1 - λ);
128 }
129 }
130 Ok(recursive_write(λ as u64, writer)? + writer.write_bits(n, λ as usize + 1)?)
131}
132
133impl<E: Endianness, B: BitRead<E>> OmegaRead<E> for B {}
134impl<E: Endianness, B: BitWrite<E>> OmegaWrite<E> for B {}
135
136#[cfg(test)]
137mod test {
138 use crate::prelude::*;
139
140 #[test]
141 fn test_roundtrip() {
142 for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
143 let mut data = vec![0_u64];
144 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data));
145 let code_len = writer.write_omega(value).unwrap();
146 assert_eq!(code_len, len_omega(value));
147 drop(writer);
148 let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
149 assert_eq!(reader.read_omega().unwrap(), value);
150
151 let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterVec::new(&mut data));
152 let code_len = writer.write_omega(value).unwrap();
153 assert_eq!(code_len, len_omega(value));
154 drop(writer);
155 let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new(&data));
156 assert_eq!(reader.read_omega().unwrap(), value,);
157 }
158 }
159
160 #[test]
161 #[allow(clippy::unusual_byte_groupings)]
162 fn test_omega() {
163 for (value, expected_be, expected_le) in [
164 (0, 0, 0),
165 (1, 0b10_0 << (64 - 3), 0b0_01),
166 (2, 0b11_0 << (64 - 3), 0b0_11),
167 (3, 0b10_100_0 << (64 - 6), 0b0_001_01),
168 (4, 0b10_101_0 << (64 - 6), 0b0_011_01),
169 (5, 0b10_110_0 << (64 - 6), 0b0_101_01),
170 (6, 0b10_111_0 << (64 - 6), 0b0_111_01),
171 (7, 0b11_1000_0 << (64 - 7), 0b0_0001_11),
172 (15, 0b10_100_10000_0 << (64 - 11), 0b0_00001_001_01),
173 (99, 0b10_110_1100100_0 << (64 - 13), 0b0_1001001_101_01),
174 (
175 999,
176 0b11_1001_1111101000_0 << (64 - 17),
177 0b0_1111010001_0011_11,
178 ),
179 (
180 999_999,
181 0b10_100_10011_11110100001001000000_0 << (64 - 31),
182 0b0_11101000010010000001_00111_001_01,
183 ),
184 ] {
185 let mut data = vec![0_u64];
186 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data));
187 writer.write_omega(value).unwrap();
188 drop(writer);
189 assert_eq!(
190 data[0].to_be(),
191 expected_be,
192 "\nfor value: {}\ngot: {:064b}\nexp: {:064b}\n",
193 value,
194 data[0].to_be(),
195 expected_be,
196 );
197
198 let mut data = vec![0_u64];
199 let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterVec::new(&mut data));
200 writer.write_omega(value).unwrap();
201 drop(writer);
202 assert_eq!(
203 data[0].to_le(),
204 expected_le,
205 "\nfor value: {}\ngot: {:064b}\nexp: {:064b}\n",
206 value,
207 data[0].to_le(),
208 expected_le,
209 );
210 }
211 }
212}