dsi_bitstream/codes/
pi.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Streamlined Apostolico−Drovandi π codes
9//!
10//! The streamlined π code with parameter *k* ≥ 0 of a natural number *n* is the
11//! concatenation of the [Rice code](super::rice) with parameter *k* of
12//! ⌊log₂(*n* + 1)⌋ and of the binary representation of *n* + 1 with the most
13//! significant bit removed.
14//!
15//! The implied distribution of a π code with parameter *k* code is ≈
16//! 1/*x*<sup>1 + 1/2*ᵏ*</sup>.
17//!
18//! Note that π₀ = [ζ₁](super::zeta) = [γ](super::gamma) and π₁ =
19//! [ζ₂](super::zeta). However, due to [subtle problems with
20//! endianness](crate::codes), in the little-endian case π₁ and ζ₂ have the same
21//! codeword lengths but slightly permuted bits.
22//!
23//! In the original paper the definition of the code is very convoluted, as the
24//! authors appear to have missed the connection with [Rice codes](super::rice).
25//! The codewords implemented by this module are equivalent to the ones in the
26//! paper, in the sense that corresponding codewords have the same length, but
27//! the codewords for *k* ≥ 2 are different, and encoding/decoding is
28//! faster—hence the name “streamlined π codes”.
29//!
30//! # References
31//!
32//! Alberto Apostolico and Guido Drovandi. “[Graph Compression by
33//! BFS](https://doi.org/10.3390/a2031031)”, Algorithms, 2:1031-1044, 2009.
34
35use crate::traits::*;
36
37use super::{len_rice, RiceRead, RiceWrite};
38
39/// Return the length of the π code for `n` with parameter `k`.
40///
41/// ```rust
42/// use dsi_bitstream::codes::len_pi;
43///
44/// // k = 0
45/// assert_eq!(len_pi(0, 0), 1, "π_0(0)");
46/// assert_eq!(len_pi(1, 0), 3, "π_0(1)");
47/// assert_eq!(len_pi(2, 0), 3, "π_0(2)");
48/// assert_eq!(len_pi(3, 0), 5, "π_0(3)");
49/// assert_eq!(len_pi(4, 0), 5, "π_0(4)");
50/// assert_eq!(len_pi(5, 0), 5, "π_0(5)");
51/// assert_eq!(len_pi(6, 0), 5, "π_0(6)");
52/// assert_eq!(len_pi(7, 0), 7, "π_0(7)");
53///
54/// // k = 1
55/// assert_eq!(len_pi(0, 1), 2, "π_1(0)");
56/// assert_eq!(len_pi(1, 1), 3, "π_1(1)");
57/// assert_eq!(len_pi(2, 1), 3, "π_1(2)");
58/// assert_eq!(len_pi(3, 1), 5, "π_1(3)");
59/// assert_eq!(len_pi(4, 1), 5, "π_1(4)");
60/// assert_eq!(len_pi(5, 1), 5, "π_1(5)");
61/// assert_eq!(len_pi(6, 1), 5, "π_1(6)");
62/// assert_eq!(len_pi(7, 1), 6, "π_1(7)");
63///
64/// // k = 2
65/// assert_eq!(len_pi(0, 2), 3, "π_2(0)");
66/// assert_eq!(len_pi(1, 2), 4, "π_2(1)");
67/// assert_eq!(len_pi(2, 2), 4, "π_2(2)");
68/// assert_eq!(len_pi(3, 2), 5, "π_2(3)");
69/// assert_eq!(len_pi(4, 2), 5, "π_2(4)");
70/// assert_eq!(len_pi(5, 2), 5, "π_2(5)");
71/// assert_eq!(len_pi(6, 2), 5, "π_2(6)");
72/// assert_eq!(len_pi(7, 2), 6, "π_2(7)");
73///
74/// // k = 3
75/// assert_eq!(len_pi(0, 3), 4, "π_3(0)");
76/// assert_eq!(len_pi(1, 3), 5, "π_3(1)");
77/// assert_eq!(len_pi(2, 3), 5, "π_3(2)");
78/// assert_eq!(len_pi(3, 3), 6, "π_3(3)");
79/// assert_eq!(len_pi(4, 3), 6, "π_3(4)");
80/// assert_eq!(len_pi(5, 3), 6, "π_3(5)");
81/// assert_eq!(len_pi(6, 3), 6, "π_3(6)");
82/// assert_eq!(len_pi(7, 3), 7, "π_3(7)");
83/// ```
84#[must_use]
85#[inline(always)]
86pub fn len_pi(mut n: u64, k: usize) -> usize {
87    n += 1;
88    let λ = n.ilog2() as usize;
89    len_rice(λ as u64, k) + λ
90}
91
92/// Trait for reading π codes.
93///
94/// This is the trait you should pull in scope to read π codes.
95pub trait PiRead<E: Endianness>: BitRead<E> + RiceRead<E> {
96    #[inline(always)]
97    fn read_pi(&mut self, k: usize) -> Result<u64, Self::Error> {
98        let λ = self.read_rice(k)?;
99        Ok((1 << λ) + self.read_bits(λ as usize)? - 1)
100    }
101}
102
103/// Trait for writing π codes.
104///
105/// This is the trait you should pull in scope to write π codes.
106pub trait PiWrite<E: Endianness>: BitWrite<E> + RiceWrite<E> {
107    #[inline(always)]
108    fn write_pi(&mut self, mut n: u64, k: usize) -> Result<usize, Self::Error> {
109        n += 1;
110        let λ = n.ilog2() as usize;
111
112        #[cfg(feature = "checks")]
113        {
114            // Clean up n in case checks are enabled
115            n ^= 1 << λ;
116        }
117
118        Ok(self.write_rice(λ as u64, k)? + self.write_bits(n, λ)?)
119    }
120}
121
122impl<E: Endianness, B: BitRead<E> + RiceRead<E> + ?Sized> PiRead<E> for B {}
123impl<E: Endianness, B: BitWrite<E> + RiceWrite<E> + ?Sized> PiWrite<E> for B {}
124
125#[cfg(test)]
126mod test {
127    use crate::prelude::*;
128
129    #[test]
130    fn test_roundtrip() {
131        let k = 3;
132        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
133            let mut data = vec![0_u64];
134            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data));
135            let code_len = writer.write_pi(value, k).unwrap();
136            assert_eq!(code_len, len_pi(value, k));
137            drop(writer);
138            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
139            assert_eq!(
140                reader.read_pi(k).unwrap(),
141                value,
142                "for value: {} with k {}",
143                value,
144                k
145            );
146        }
147    }
148
149    #[test]
150    #[allow(clippy::unusual_byte_groupings)]
151    fn test_bits() {
152        for (k, value, expected) in [
153            (2, 20, 0b01_00_0101 << (64 - 8)),
154            (2, 0, 0b100 << (64 - 3)),
155            (2, 1, 0b1010 << (64 - 4)),
156            (2, 2, 0b1011 << (64 - 4)),
157            (2, 3, 0b1_1000 << (64 - 5)),
158            (2, 4, 0b1_1001 << (64 - 5)),
159            (2, 5, 0b1_1010 << (64 - 5)),
160            (2, 6, 0b1_1011 << (64 - 5)),
161            (2, 7, 0b11_1000 << (64 - 6)),
162            (3, 0, 0b1000 << (64 - 4)),
163            (3, 1, 0b1_0010 << (64 - 5)),
164            (3, 2, 0b1_0011 << (64 - 5)),
165            (3, 3, 0b1_01000 << (64 - 6)),
166            (3, 4, 0b1_01001 << (64 - 6)),
167            (3, 5, 0b1_01010 << (64 - 6)),
168            (3, 6, 0b1_01011 << (64 - 6)),
169            (3, 7, 0b101_1000 << (64 - 7)),
170        ] {
171            let mut data = vec![0_u64];
172            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data));
173            let code_len = writer.write_pi(value, k).unwrap();
174            assert_eq!(code_len, len_pi(value, k));
175            drop(writer);
176            assert_eq!(
177                data[0].to_be(),
178                expected,
179                "\nfor value: {} with k {}\ngot: {:064b}\nexp: {:064b}\ngot_len: {} exp_len: {}\n",
180                value,
181                k,
182                data[0].to_be(),
183                expected,
184                code_len,
185                len_pi(value, k),
186            );
187        }
188    }
189
190    #[test]
191    fn test_against_zeta() {
192        // BE: π₀ = ζ₁ and π₁ = ζ₂
193        for k in 0..2 {
194            for value in 0..100 {
195                let mut data_pi = vec![0_u64];
196                let mut data_zeta = vec![0_u64];
197
198                let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data_pi));
199                let code_len = writer.write_pi(value, k).unwrap();
200                assert_eq!(code_len, len_pi(value, k));
201                drop(writer);
202
203                let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data_zeta));
204                let code_len = writer.write_zeta(value, 1 << k).unwrap();
205                assert_eq!(code_len, len_zeta(value, 1 << k));
206                drop(writer);
207
208                assert_eq!(data_pi[0], data_zeta[0]);
209            }
210        }
211
212        // LE: π₀ = ζ₁; π₁ and ζ₂ have the same lengths but permuted bits
213        for value in 0..100 {
214            let mut data_pi = vec![0_u64];
215            let mut data_zeta = vec![0_u64];
216
217            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterVec::new(&mut data_pi));
218            let code_len = writer.write_pi(value, 0).unwrap();
219            assert_eq!(code_len, len_pi(value, 0));
220            drop(writer);
221
222            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterVec::new(&mut data_zeta));
223            let code_len = writer.write_zeta(value, 1).unwrap();
224            assert_eq!(code_len, len_zeta(value, 1));
225            drop(writer);
226
227            assert_eq!(data_pi[0], data_zeta[0]);
228        }
229
230        for value in 0..100 {
231            assert_eq!(len_pi(value, 1), len_zeta(value, 2));
232        }
233    }
234}