dsi_bitstream/codes/
pi.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Streamlined Apostolico−Drovandi π codes
9//!
10//! The streamlined π code with parameter *k* ≥ 0 of a natural number *n* is the
11//! concatenation of the [Rice code](super::rice) with parameter *k* of
12//! ⌊log₂(*n* + 1)⌋ and of the binary representation of *n* + 1 with the most
13//! significant bit removed.
14//!
15//! The implied distribution of a π code with parameter *k* code is ≈
16//! 1/*x*<sup>1 + 1/2*ᵏ*</sup>.
17//!
18//! Note that π₀ = [ζ₁](super::zeta) = [γ](super::gamma) and π₁ =
19//! [ζ₂](super::zeta). However, due to [subtle problems with
20//! endianness](crate::codes), in the little-endian case π₁ and ζ₂ have the same
21//! codeword lengths but slightly permuted bits.
22//!
23//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [0 . . 64).
24//!
25//! In the original paper the definition of the code is very convoluted, as the
26//! authors appear to have missed the connection with [Rice codes](super::rice).
27//! The codewords implemented by this module are equivalent to the ones in the
28//! paper, in the sense that corresponding codewords have the same length, but
29//! the codewords for *k* ≥ 2 are different, and encoding/decoding is
30//! faster—hence the name “streamlined π codes”.
31//!
32//! # References
33//!
34//! Alberto Apostolico and Guido Drovandi. “[Graph Compression by
35//! BFS](https://doi.org/10.3390/a2031031)”, Algorithms, 2:1031-1044, 2009.
36
37use crate::traits::*;
38
39use super::{RiceRead, RiceWrite, len_rice};
40
41/// Return the length of the π code for `n` with parameter `k`.
42///
43/// ```rust
44/// use dsi_bitstream::codes::len_pi;
45///
46/// // k = 0
47/// assert_eq!(len_pi(0, 0), 1, "π_0(0)");
48/// assert_eq!(len_pi(1, 0), 3, "π_0(1)");
49/// assert_eq!(len_pi(2, 0), 3, "π_0(2)");
50/// assert_eq!(len_pi(3, 0), 5, "π_0(3)");
51/// assert_eq!(len_pi(4, 0), 5, "π_0(4)");
52/// assert_eq!(len_pi(5, 0), 5, "π_0(5)");
53/// assert_eq!(len_pi(6, 0), 5, "π_0(6)");
54/// assert_eq!(len_pi(7, 0), 7, "π_0(7)");
55///
56/// // k = 1
57/// assert_eq!(len_pi(0, 1), 2, "π_1(0)");
58/// assert_eq!(len_pi(1, 1), 3, "π_1(1)");
59/// assert_eq!(len_pi(2, 1), 3, "π_1(2)");
60/// assert_eq!(len_pi(3, 1), 5, "π_1(3)");
61/// assert_eq!(len_pi(4, 1), 5, "π_1(4)");
62/// assert_eq!(len_pi(5, 1), 5, "π_1(5)");
63/// assert_eq!(len_pi(6, 1), 5, "π_1(6)");
64/// assert_eq!(len_pi(7, 1), 6, "π_1(7)");
65///
66/// // k = 2
67/// assert_eq!(len_pi(0, 2), 3, "π_2(0)");
68/// assert_eq!(len_pi(1, 2), 4, "π_2(1)");
69/// assert_eq!(len_pi(2, 2), 4, "π_2(2)");
70/// assert_eq!(len_pi(3, 2), 5, "π_2(3)");
71/// assert_eq!(len_pi(4, 2), 5, "π_2(4)");
72/// assert_eq!(len_pi(5, 2), 5, "π_2(5)");
73/// assert_eq!(len_pi(6, 2), 5, "π_2(6)");
74/// assert_eq!(len_pi(7, 2), 6, "π_2(7)");
75///
76/// // k = 3
77/// assert_eq!(len_pi(0, 3), 4, "π_3(0)");
78/// assert_eq!(len_pi(1, 3), 5, "π_3(1)");
79/// assert_eq!(len_pi(2, 3), 5, "π_3(2)");
80/// assert_eq!(len_pi(3, 3), 6, "π_3(3)");
81/// assert_eq!(len_pi(4, 3), 6, "π_3(4)");
82/// assert_eq!(len_pi(5, 3), 6, "π_3(5)");
83/// assert_eq!(len_pi(6, 3), 6, "π_3(6)");
84/// assert_eq!(len_pi(7, 3), 7, "π_3(7)");
85/// ```
86#[must_use]
87#[inline(always)]
88pub fn len_pi(mut n: u64, k: usize) -> usize {
89    debug_assert!(k < 64);
90    debug_assert!(n < u64::MAX);
91    n += 1;
92    let λ = n.ilog2() as usize;
93    len_rice(λ as u64, k) + λ
94}
95
96/// Trait for reading π codes.
97///
98/// This is the trait you should pull in scope to read π codes.
99pub trait PiRead<E: Endianness>: BitRead<E> + RiceRead<E> {
100    #[inline(always)]
101    fn read_pi(&mut self, k: usize) -> Result<u64, Self::Error> {
102        debug_assert!(k < 64);
103        let λ = self.read_rice(k)?;
104        debug_assert!(λ < 64);
105        Ok((1 << λ) + self.read_bits(λ as usize)? - 1)
106    }
107}
108
109/// Trait for writing π codes.
110///
111/// This is the trait you should pull in scope to write π codes.
112pub trait PiWrite<E: Endianness>: BitWrite<E> + RiceWrite<E> {
113    #[inline(always)]
114    fn write_pi(&mut self, mut n: u64, k: usize) -> Result<usize, Self::Error> {
115        debug_assert!(k < 64);
116        debug_assert!(n < u64::MAX);
117
118        n += 1;
119        let λ = n.ilog2() as usize;
120
121        #[cfg(feature = "checks")]
122        {
123            // Clean up n in case checks are enabled
124            n ^= 1 << λ;
125        }
126
127        Ok(self.write_rice(λ as u64, k)? + self.write_bits(n, λ)?)
128    }
129}
130
131impl<E: Endianness, B: BitRead<E> + RiceRead<E> + ?Sized> PiRead<E> for B {}
132impl<E: Endianness, B: BitWrite<E> + RiceWrite<E> + ?Sized> PiWrite<E> for B {}
133
134#[cfg(test)]
135mod test {
136    use crate::prelude::*;
137
138    #[test]
139    fn test_roundtrip() {
140        let k = 3;
141        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
142            let mut data = [0_u64; 10];
143            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
144            let code_len = writer.write_pi(value, k).unwrap();
145            assert_eq!(code_len, len_pi(value, k));
146            drop(writer);
147            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
148            assert_eq!(
149                reader.read_pi(k).unwrap(),
150                value,
151                "for value: {} with k {}",
152                value,
153                k
154            );
155        }
156    }
157
158    #[test]
159    #[allow(clippy::unusual_byte_groupings)]
160    fn test_bits() {
161        for (k, value, expected) in [
162            (2, 20, 0b01_00_0101 << (64 - 8)),
163            (2, 0, 0b100 << (64 - 3)),
164            (2, 1, 0b1010 << (64 - 4)),
165            (2, 2, 0b1011 << (64 - 4)),
166            (2, 3, 0b1_1000 << (64 - 5)),
167            (2, 4, 0b1_1001 << (64 - 5)),
168            (2, 5, 0b1_1010 << (64 - 5)),
169            (2, 6, 0b1_1011 << (64 - 5)),
170            (2, 7, 0b11_1000 << (64 - 6)),
171            (3, 0, 0b1000 << (64 - 4)),
172            (3, 1, 0b1_0010 << (64 - 5)),
173            (3, 2, 0b1_0011 << (64 - 5)),
174            (3, 3, 0b1_01000 << (64 - 6)),
175            (3, 4, 0b1_01001 << (64 - 6)),
176            (3, 5, 0b1_01010 << (64 - 6)),
177            (3, 6, 0b1_01011 << (64 - 6)),
178            (3, 7, 0b101_1000 << (64 - 7)),
179        ] {
180            let mut data = [0_u64; 10];
181            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
182            let code_len = writer.write_pi(value, k).unwrap();
183            assert_eq!(code_len, len_pi(value, k));
184            drop(writer);
185            assert_eq!(
186                data[0].to_be(),
187                expected,
188                "\nfor value: {} with k {}\ngot: {:064b}\nexp: {:064b}\ngot_len: {} exp_len: {}\n",
189                value,
190                k,
191                data[0].to_be(),
192                expected,
193                code_len,
194                len_pi(value, k),
195            );
196        }
197    }
198
199    #[test]
200    fn test_against_zeta() {
201        // BE: π₀ = ζ₁ and π₁ = ζ₂
202        for k in 0..2 {
203            for value in 0..100 {
204                let mut data_pi = [0_u64; 10];
205                let mut data_zeta = [0_u64; 10];
206
207                let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
208                let code_len = writer.write_pi(value, k).unwrap();
209                assert_eq!(code_len, len_pi(value, k));
210                drop(writer);
211
212                let mut writer =
213                    <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
214                let code_len = writer.write_zeta(value, 1 << k).unwrap();
215                assert_eq!(code_len, len_zeta(value, 1 << k));
216                drop(writer);
217
218                assert_eq!(data_pi[0], data_zeta[0]);
219            }
220        }
221
222        // LE: π₀ = ζ₁; π₁ and ζ₂ have the same lengths but permuted bits
223        for value in 0..100 {
224            let mut data_pi = [0_u64; 10];
225            let mut data_zeta = [0_u64; 10];
226
227            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
228            let code_len = writer.write_pi(value, 0).unwrap();
229            assert_eq!(code_len, len_pi(value, 0));
230            drop(writer);
231
232            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
233            let code_len = writer.write_zeta(value, 1).unwrap();
234            assert_eq!(code_len, len_zeta(value, 1));
235            drop(writer);
236
237            assert_eq!(data_pi[0], data_zeta[0]);
238        }
239
240        for value in 0..100 {
241            assert_eq!(len_pi(value, 1), len_zeta(value, 2));
242        }
243    }
244}