dsi_bitstream/codes/pi.rs
1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Streamlined Apostolico−Drovandi π codes
9//!
10//! The streamlined π code with parameter *k* ≥ 0 of a natural number *n* is the
11//! concatenation of the [Rice code](super::rice) with parameter *k* of
12//! ⌊log₂(*n* + 1)⌋ and of the binary representation of *n* + 1 with the most
13//! significant bit removed.
14//!
15//! The implied distribution of a π code with parameter *k* code is ≈
16//! 1/*x*<sup>1 + 1/2*ᵏ*</sup>.
17//!
18//! Note that π₀ = [ζ₁](super::zeta) = [γ](super::gamma) and π₁ =
19//! [ζ₂](super::zeta). However, due to [subtle problems with
20//! endianness](crate::codes), in the little-endian case π₁ and ζ₂ have the same
21//! codeword lengths but slightly permuted bits.
22//!
23//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [0 . . 64).
24//!
25//! In the original paper the definition of the code is very convoluted, as the
26//! authors appear to have missed the connection with [Rice codes](super::rice).
27//! The codewords implemented by this module are equivalent to the ones in the
28//! paper, in the sense that corresponding codewords have the same length, but
29//! the codewords for *k* ≥ 2 are different, and encoding/decoding is
30//! faster—hence the name “streamlined π codes”.
31//!
32//! # References
33//!
34//! Alberto Apostolico and Guido Drovandi. “[Graph Compression by
35//! BFS](https://doi.org/10.3390/a2031031)”, Algorithms, 2:1031-1044, 2009.
36
37use crate::traits::*;
38
39use super::{RiceRead, RiceWrite, len_rice};
40
41/// Return the length of the π code for `n` with parameter `k`.
42///
43/// ```rust
44/// use dsi_bitstream::codes::len_pi;
45///
46/// // k = 0
47/// assert_eq!(len_pi(0, 0), 1, "π_0(0)");
48/// assert_eq!(len_pi(1, 0), 3, "π_0(1)");
49/// assert_eq!(len_pi(2, 0), 3, "π_0(2)");
50/// assert_eq!(len_pi(3, 0), 5, "π_0(3)");
51/// assert_eq!(len_pi(4, 0), 5, "π_0(4)");
52/// assert_eq!(len_pi(5, 0), 5, "π_0(5)");
53/// assert_eq!(len_pi(6, 0), 5, "π_0(6)");
54/// assert_eq!(len_pi(7, 0), 7, "π_0(7)");
55///
56/// // k = 1
57/// assert_eq!(len_pi(0, 1), 2, "π_1(0)");
58/// assert_eq!(len_pi(1, 1), 3, "π_1(1)");
59/// assert_eq!(len_pi(2, 1), 3, "π_1(2)");
60/// assert_eq!(len_pi(3, 1), 5, "π_1(3)");
61/// assert_eq!(len_pi(4, 1), 5, "π_1(4)");
62/// assert_eq!(len_pi(5, 1), 5, "π_1(5)");
63/// assert_eq!(len_pi(6, 1), 5, "π_1(6)");
64/// assert_eq!(len_pi(7, 1), 6, "π_1(7)");
65///
66/// // k = 2
67/// assert_eq!(len_pi(0, 2), 3, "π_2(0)");
68/// assert_eq!(len_pi(1, 2), 4, "π_2(1)");
69/// assert_eq!(len_pi(2, 2), 4, "π_2(2)");
70/// assert_eq!(len_pi(3, 2), 5, "π_2(3)");
71/// assert_eq!(len_pi(4, 2), 5, "π_2(4)");
72/// assert_eq!(len_pi(5, 2), 5, "π_2(5)");
73/// assert_eq!(len_pi(6, 2), 5, "π_2(6)");
74/// assert_eq!(len_pi(7, 2), 6, "π_2(7)");
75///
76/// // k = 3
77/// assert_eq!(len_pi(0, 3), 4, "π_3(0)");
78/// assert_eq!(len_pi(1, 3), 5, "π_3(1)");
79/// assert_eq!(len_pi(2, 3), 5, "π_3(2)");
80/// assert_eq!(len_pi(3, 3), 6, "π_3(3)");
81/// assert_eq!(len_pi(4, 3), 6, "π_3(4)");
82/// assert_eq!(len_pi(5, 3), 6, "π_3(5)");
83/// assert_eq!(len_pi(6, 3), 6, "π_3(6)");
84/// assert_eq!(len_pi(7, 3), 7, "π_3(7)");
85/// ```
86#[must_use]
87#[inline(always)]
88pub fn len_pi(mut n: u64, k: usize) -> usize {
89 debug_assert!(k < 64);
90 debug_assert!(n < u64::MAX);
91 n += 1;
92 let λ = n.ilog2() as usize;
93 len_rice(λ as u64, k) + λ
94}
95
96/// Trait for reading π codes.
97///
98/// This is the trait you should pull in scope to read π codes.
99pub trait PiRead<E: Endianness>: BitRead<E> + RiceRead<E> {
100 #[inline(always)]
101 fn read_pi(&mut self, k: usize) -> Result<u64, Self::Error> {
102 debug_assert!(k < 64);
103 let λ = self.read_rice(k)?;
104 debug_assert!(λ < 64);
105 Ok((1 << λ) + self.read_bits(λ as usize)? - 1)
106 }
107}
108
109/// Trait for writing π codes.
110///
111/// This is the trait you should pull in scope to write π codes.
112pub trait PiWrite<E: Endianness>: BitWrite<E> + RiceWrite<E> {
113 #[inline(always)]
114 fn write_pi(&mut self, mut n: u64, k: usize) -> Result<usize, Self::Error> {
115 debug_assert!(k < 64);
116 debug_assert!(n < u64::MAX);
117
118 n += 1;
119 let λ = n.ilog2() as usize;
120
121 #[cfg(feature = "checks")]
122 {
123 // Clean up n in case checks are enabled
124 n ^= 1 << λ;
125 }
126
127 Ok(self.write_rice(λ as u64, k)? + self.write_bits(n, λ)?)
128 }
129}
130
131impl<E: Endianness, B: BitRead<E> + RiceRead<E> + ?Sized> PiRead<E> for B {}
132impl<E: Endianness, B: BitWrite<E> + RiceWrite<E> + ?Sized> PiWrite<E> for B {}
133
134#[cfg(test)]
135mod test {
136 use crate::prelude::*;
137
138 #[test]
139 fn test_roundtrip() {
140 let k = 3;
141 for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
142 let mut data = [0_u64; 10];
143 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
144 let code_len = writer.write_pi(value, k).unwrap();
145 assert_eq!(code_len, len_pi(value, k));
146 drop(writer);
147 let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
148 assert_eq!(
149 reader.read_pi(k).unwrap(),
150 value,
151 "for value: {} with k {}",
152 value,
153 k
154 );
155 }
156 }
157
158 #[test]
159 #[allow(clippy::unusual_byte_groupings)]
160 fn test_bits() {
161 for (k, value, expected) in [
162 (2, 20, 0b01_00_0101 << (64 - 8)),
163 (2, 0, 0b100 << (64 - 3)),
164 (2, 1, 0b1010 << (64 - 4)),
165 (2, 2, 0b1011 << (64 - 4)),
166 (2, 3, 0b1_1000 << (64 - 5)),
167 (2, 4, 0b1_1001 << (64 - 5)),
168 (2, 5, 0b1_1010 << (64 - 5)),
169 (2, 6, 0b1_1011 << (64 - 5)),
170 (2, 7, 0b11_1000 << (64 - 6)),
171 (3, 0, 0b1000 << (64 - 4)),
172 (3, 1, 0b1_0010 << (64 - 5)),
173 (3, 2, 0b1_0011 << (64 - 5)),
174 (3, 3, 0b1_01000 << (64 - 6)),
175 (3, 4, 0b1_01001 << (64 - 6)),
176 (3, 5, 0b1_01010 << (64 - 6)),
177 (3, 6, 0b1_01011 << (64 - 6)),
178 (3, 7, 0b101_1000 << (64 - 7)),
179 ] {
180 let mut data = [0_u64; 10];
181 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
182 let code_len = writer.write_pi(value, k).unwrap();
183 assert_eq!(code_len, len_pi(value, k));
184 drop(writer);
185 assert_eq!(
186 data[0].to_be(),
187 expected,
188 "\nfor value: {} with k {}\ngot: {:064b}\nexp: {:064b}\ngot_len: {} exp_len: {}\n",
189 value,
190 k,
191 data[0].to_be(),
192 expected,
193 code_len,
194 len_pi(value, k),
195 );
196 }
197 }
198
199 #[test]
200 fn test_against_zeta() {
201 // BE: π₀ = ζ₁ and π₁ = ζ₂
202 for k in 0..2 {
203 for value in 0..100 {
204 let mut data_pi = [0_u64; 10];
205 let mut data_zeta = [0_u64; 10];
206
207 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
208 let code_len = writer.write_pi(value, k).unwrap();
209 assert_eq!(code_len, len_pi(value, k));
210 drop(writer);
211
212 let mut writer =
213 <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
214 let code_len = writer.write_zeta(value, 1 << k).unwrap();
215 assert_eq!(code_len, len_zeta(value, 1 << k));
216 drop(writer);
217
218 assert_eq!(data_pi[0], data_zeta[0]);
219 }
220 }
221
222 // LE: π₀ = ζ₁; π₁ and ζ₂ have the same lengths but permuted bits
223 for value in 0..100 {
224 let mut data_pi = [0_u64; 10];
225 let mut data_zeta = [0_u64; 10];
226
227 let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
228 let code_len = writer.write_pi(value, 0).unwrap();
229 assert_eq!(code_len, len_pi(value, 0));
230 drop(writer);
231
232 let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
233 let code_len = writer.write_zeta(value, 1).unwrap();
234 assert_eq!(code_len, len_zeta(value, 1));
235 drop(writer);
236
237 assert_eq!(data_pi[0], data_zeta[0]);
238 }
239
240 for value in 0..100 {
241 assert_eq!(len_pi(value, 1), len_zeta(value, 2));
242 }
243 }
244}