Skip to main content

dsi_bitstream/codes/
pi.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Streamlined Apostolico−Drovandi π codes
9//!
10//! The streamlined π code with parameter *k* ≥ 0 of a natural number *n* is the
11//! concatenation of the [Rice code](super::rice) with parameter 2*ᵏ* of
12//! ⌊log₂(*n* + 1)⌋ and of the binary representation of *n* + 1 with the most
13//! significant bit removed.
14//!
15//! The implied distribution of a π code with parameter *k* code is ≈
16//! 1/*x*<sup>1 + 1/2*ᵏ*</sup>.
17//!
18//! Note that π₀ = [ζ₁](super::zeta) = [γ](super::gamma) and π₁ =
19//! [ζ₂](super::zeta). However, due to [subtle problems with
20//! endianness](crate::codes), in the little-endian case π₁ and ζ₂ have the same
21//! codeword lengths but slightly permuted bits.
22//!
23//! This module provides a generic implementation of π codes, and a specialized
24//! implementation for π₂ that may use tables.
25//!
26//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [0 . . 64).
27//!
28//! In the original paper the definition of the code is very convoluted, as the
29//! authors appear to have missed the connection with [Rice codes](super::rice).
30//! The codewords implemented by this module are equivalent to the ones in the
31//! paper, in the sense that corresponding codewords have the same length, but
32//! the codewords for *k* ≥ 2 are different, and encoding/decoding is
33//! faster—hence the name “streamlined π codes”.
34//!
35//! # References
36//!
37//! Alberto Apostolico and Guido Drovandi. “[Graph Compression by
38//! BFS](https://doi.org/10.3390/a2031031)”, Algorithms, 2:1031-1044, 2009.
39
40use crate::traits::*;
41
42use super::{RiceRead, RiceWrite, len_rice, pi_tables};
43
44/// Returns the length of the π code with parameter `k` for `n`.
45#[must_use]
46#[inline(always)]
47#[allow(clippy::collapsible_if)]
48pub fn len_pi_param<const USE_TABLE: bool>(mut n: u64, k: usize) -> usize {
49    debug_assert!(k < 64);
50    if USE_TABLE {
51        if k == pi_tables::K {
52            if let Some(idx) = pi_tables::LEN.get(n as usize) {
53                return *idx as usize;
54            }
55        }
56    }
57    debug_assert!(n < u64::MAX);
58    n += 1;
59    let λ = n.ilog2() as usize;
60    len_rice(λ as u64, k) + λ
61}
62
63/// Returns the length of the π code for `n` with parameter `k` using
64/// a default value for `USE_TABLE`.
65///
66/// ```rust
67/// use dsi_bitstream::codes::len_pi;
68///
69/// // k = 0
70/// assert_eq!(len_pi(0, 0), 1, "π_0(0)");
71/// assert_eq!(len_pi(1, 0), 3, "π_0(1)");
72/// assert_eq!(len_pi(2, 0), 3, "π_0(2)");
73/// assert_eq!(len_pi(3, 0), 5, "π_0(3)");
74/// assert_eq!(len_pi(4, 0), 5, "π_0(4)");
75/// assert_eq!(len_pi(5, 0), 5, "π_0(5)");
76/// assert_eq!(len_pi(6, 0), 5, "π_0(6)");
77/// assert_eq!(len_pi(7, 0), 7, "π_0(7)");
78///
79/// // k = 1
80/// assert_eq!(len_pi(0, 1), 2, "π_1(0)");
81/// assert_eq!(len_pi(1, 1), 3, "π_1(1)");
82/// assert_eq!(len_pi(2, 1), 3, "π_1(2)");
83/// assert_eq!(len_pi(3, 1), 5, "π_1(3)");
84/// assert_eq!(len_pi(4, 1), 5, "π_1(4)");
85/// assert_eq!(len_pi(5, 1), 5, "π_1(5)");
86/// assert_eq!(len_pi(6, 1), 5, "π_1(6)");
87/// assert_eq!(len_pi(7, 1), 6, "π_1(7)");
88///
89/// // k = 2
90/// assert_eq!(len_pi(0, 2), 3, "π_2(0)");
91/// assert_eq!(len_pi(1, 2), 4, "π_2(1)");
92/// assert_eq!(len_pi(2, 2), 4, "π_2(2)");
93/// assert_eq!(len_pi(3, 2), 5, "π_2(3)");
94/// assert_eq!(len_pi(4, 2), 5, "π_2(4)");
95/// assert_eq!(len_pi(5, 2), 5, "π_2(5)");
96/// assert_eq!(len_pi(6, 2), 5, "π_2(6)");
97/// assert_eq!(len_pi(7, 2), 6, "π_2(7)");
98///
99/// // k = 3
100/// assert_eq!(len_pi(0, 3), 4, "π_3(0)");
101/// assert_eq!(len_pi(1, 3), 5, "π_3(1)");
102/// assert_eq!(len_pi(2, 3), 5, "π_3(2)");
103/// assert_eq!(len_pi(3, 3), 6, "π_3(3)");
104/// assert_eq!(len_pi(4, 3), 6, "π_3(4)");
105/// assert_eq!(len_pi(5, 3), 6, "π_3(5)");
106/// assert_eq!(len_pi(6, 3), 6, "π_3(6)");
107/// assert_eq!(len_pi(7, 3), 7, "π_3(7)");
108/// ```
109#[must_use]
110#[inline(always)]
111pub fn len_pi(n: u64, k: usize) -> usize {
112    len_pi_param::<true>(n, k)
113}
114
115/// Trait for reading π codes.
116///
117/// This is the trait you should usually pull in scope to read π codes.
118pub trait PiRead<E: Endianness>: BitRead<E> {
119    fn read_pi(&mut self, k: usize) -> Result<u64, Self::Error>;
120    fn read_pi2(&mut self) -> Result<u64, Self::Error>;
121}
122
123/// Parametric trait for reading π codes.
124///
125/// This trait is more general than [`PiRead`], as it makes it possible
126/// to specify how to use tables using const parameters.
127///
128/// We provide an implementation of this trait for [`BitRead`]. An implementation
129/// of [`PiRead`] using default values is usually provided exploiting the
130/// [`crate::codes::params::ReadParams`] mechanism.
131pub trait PiReadParam<E: Endianness>: BitRead<E> + RiceRead<E> {
132    fn read_pi_param(&mut self, k: usize) -> Result<u64, Self::Error>;
133    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, Self::Error>;
134}
135
136impl<B: BitRead<BE> + RiceRead<BE>> PiReadParam<BE> for B {
137    #[inline(always)]
138    fn read_pi_param(&mut self, k: usize) -> Result<u64, B::Error> {
139        default_read_pi(self, k)
140    }
141
142    #[inline(always)]
143    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
144        if USE_TABLE {
145            if let Some((res, _)) = pi_tables::read_table_be(self) {
146                return Ok(res);
147            }
148        }
149        default_read_pi(self, 2)
150    }
151}
152
153impl<B: BitRead<LE> + RiceRead<LE>> PiReadParam<LE> for B {
154    #[inline(always)]
155    fn read_pi_param(&mut self, k: usize) -> Result<u64, B::Error> {
156        default_read_pi(self, k)
157    }
158
159    #[inline(always)]
160    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
161        if USE_TABLE {
162            if let Some((res, _)) = pi_tables::read_table_le(self) {
163                return Ok(res);
164            }
165        }
166        default_read_pi(self, 2)
167    }
168}
169
170/// Default, internal non-table based implementation that works
171/// for any endianness.
172#[inline(always)]
173fn default_read_pi<E: Endianness, B: BitRead<E> + RiceRead<E>>(
174    backend: &mut B,
175    k: usize,
176) -> Result<u64, B::Error> {
177    debug_assert!(k < 64);
178    let λ = backend.read_rice(k)?;
179    debug_assert!(λ < 64);
180    Ok((1 << λ) + backend.read_bits(λ as usize)? - 1)
181}
182
183/// Trait for writing π codes.
184///
185/// This is the trait you should usually pull in scope to write π codes.
186pub trait PiWrite<E: Endianness>: BitWrite<E> {
187    fn write_pi(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
188    fn write_pi2(&mut self, n: u64) -> Result<usize, Self::Error>;
189}
190
191/// Parametric trait for writing π codes.
192///
193/// This trait is more general than [`PiWrite`], as it makes it possible
194/// to specify how to use tables using const parameters.
195///
196/// We provide an implementation of this trait for [`BitWrite`]. An implementation
197/// of [`PiWrite`] using default values is usually provided exploiting the
198/// [`crate::codes::params::WriteParams`] mechanism.
199pub trait PiWriteParam<E: Endianness>: BitWrite<E> + RiceWrite<E> {
200    fn write_pi_param<const USE_TABLE: bool>(
201        &mut self,
202        n: u64,
203        k: usize,
204    ) -> Result<usize, Self::Error>;
205    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
206}
207
208impl<B: BitWrite<BE> + RiceWrite<BE>> PiWriteParam<BE> for B {
209    #[inline(always)]
210    fn write_pi_param<const USE_TABLE: bool>(
211        &mut self,
212        n: u64,
213        k: usize,
214    ) -> Result<usize, Self::Error> {
215        default_write_pi(self, n, k)
216    }
217
218    #[inline(always)]
219    #[allow(clippy::collapsible_if)]
220    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
221        if USE_TABLE {
222            if let Some(len) = pi_tables::write_table_be(self, n)? {
223                return Ok(len);
224            }
225        }
226        default_write_pi(self, n, 2)
227    }
228}
229
230impl<B: BitWrite<LE> + RiceWrite<LE>> PiWriteParam<LE> for B {
231    #[inline(always)]
232    fn write_pi_param<const USE_TABLE: bool>(
233        &mut self,
234        n: u64,
235        k: usize,
236    ) -> Result<usize, Self::Error> {
237        default_write_pi(self, n, k)
238    }
239
240    #[inline(always)]
241    #[allow(clippy::collapsible_if)]
242    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
243        if USE_TABLE {
244            if let Some(len) = pi_tables::write_table_le(self, n)? {
245                return Ok(len);
246            }
247        }
248        default_write_pi(self, n, 2)
249    }
250}
251
252/// Default, internal non-table based implementation that works
253/// for any endianness.
254#[inline(always)]
255fn default_write_pi<E: Endianness, B: BitWrite<E> + RiceWrite<E>>(
256    backend: &mut B,
257    mut n: u64,
258    k: usize,
259) -> Result<usize, B::Error> {
260    debug_assert!(k < 64);
261    debug_assert!(n < u64::MAX);
262
263    n += 1;
264    let λ = n.ilog2() as usize;
265
266    #[cfg(feature = "checks")]
267    {
268        // Clean up n in case checks are enabled
269        n ^= 1 << λ;
270    }
271
272    Ok(backend.write_rice(λ as u64, k)? + backend.write_bits(n, λ)?)
273}
274
275#[cfg(test)]
276mod test {
277    use crate::prelude::*;
278
279    #[test]
280    fn test_roundtrip() {
281        let k = 3;
282        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
283            let mut data = [0_u64; 10];
284            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
285            let code_len = writer.write_pi(value, k).unwrap();
286            assert_eq!(code_len, len_pi(value, k));
287            drop(writer);
288            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
289            assert_eq!(
290                reader.read_pi(k).unwrap(),
291                value,
292                "for value: {} with k {}",
293                value,
294                k
295            );
296        }
297    }
298
299    #[test]
300    fn test_roundtrip_pi2() {
301        // Test the specialized pi2 methods
302        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
303            // Test BE
304            let mut data = [0_u64; 10];
305            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
306            let code_len = writer.write_pi2(value).unwrap();
307            assert_eq!(code_len, len_pi(value, 2));
308            drop(writer);
309            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
310            assert_eq!(reader.read_pi2().unwrap(), value, "BE for value: {}", value,);
311
312            // Test LE
313            let mut data = [0_u64; 10];
314            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
315            let code_len = writer.write_pi2(value).unwrap();
316            assert_eq!(code_len, len_pi(value, 2));
317            drop(writer);
318            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new(&data));
319            assert_eq!(reader.read_pi2().unwrap(), value, "LE for value: {}", value,);
320        }
321    }
322
323    #[test]
324    fn test_roundtrip_pi2_param() {
325        // Test the parametric pi2 methods with tables
326        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
327            // Test BE with tables
328            let mut data = [0_u64; 10];
329            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
330            let code_len = writer.write_pi2_param::<true>(value).unwrap();
331            assert_eq!(code_len, len_pi(value, 2));
332            drop(writer);
333            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
334            assert_eq!(
335                reader.read_pi2_param::<true>().unwrap(),
336                value,
337                "BE table for value: {}",
338                value,
339            );
340
341            // Test BE without tables
342            let mut data = [0_u64; 10];
343            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
344            let code_len = writer.write_pi2_param::<false>(value).unwrap();
345            assert_eq!(code_len, len_pi(value, 2));
346            drop(writer);
347            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
348            assert_eq!(
349                reader.read_pi2_param::<false>().unwrap(),
350                value,
351                "BE no table for value: {}",
352                value,
353            );
354
355            // Test LE with tables
356            let mut data = [0_u64; 10];
357            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
358            let code_len = writer.write_pi2_param::<true>(value).unwrap();
359            assert_eq!(code_len, len_pi(value, 2));
360            drop(writer);
361            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new(&data));
362            assert_eq!(
363                reader.read_pi2_param::<true>().unwrap(),
364                value,
365                "LE table for value: {}",
366                value,
367            );
368
369            // Test LE without tables
370            let mut data = [0_u64; 10];
371            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
372            let code_len = writer.write_pi2_param::<false>(value).unwrap();
373            assert_eq!(code_len, len_pi(value, 2));
374            drop(writer);
375            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new(&data));
376            assert_eq!(
377                reader.read_pi2_param::<false>().unwrap(),
378                value,
379                "LE no table for value: {}",
380                value,
381            );
382        }
383    }
384
385    #[test]
386    #[allow(clippy::unusual_byte_groupings)]
387    fn test_bits() {
388        for (k, value, expected) in [
389            (2, 20, 0b01_00_0101 << (64 - 8)),
390            (2, 0, 0b100 << (64 - 3)),
391            (2, 1, 0b1010 << (64 - 4)),
392            (2, 2, 0b1011 << (64 - 4)),
393            (2, 3, 0b1_1000 << (64 - 5)),
394            (2, 4, 0b1_1001 << (64 - 5)),
395            (2, 5, 0b1_1010 << (64 - 5)),
396            (2, 6, 0b1_1011 << (64 - 5)),
397            (2, 7, 0b11_1000 << (64 - 6)),
398            (3, 0, 0b1000 << (64 - 4)),
399            (3, 1, 0b1_0010 << (64 - 5)),
400            (3, 2, 0b1_0011 << (64 - 5)),
401            (3, 3, 0b1_01000 << (64 - 6)),
402            (3, 4, 0b1_01001 << (64 - 6)),
403            (3, 5, 0b1_01010 << (64 - 6)),
404            (3, 6, 0b1_01011 << (64 - 6)),
405            (3, 7, 0b101_1000 << (64 - 7)),
406        ] {
407            let mut data = [0_u64; 10];
408            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
409            let code_len = writer.write_pi(value, k).unwrap();
410            assert_eq!(code_len, len_pi(value, k));
411            drop(writer);
412            assert_eq!(
413                data[0].to_be(),
414                expected,
415                "\nfor value: {} with k {}\ngot: {:064b}\nexp: {:064b}\ngot_len: {} exp_len: {}\n",
416                value,
417                k,
418                data[0].to_be(),
419                expected,
420                code_len,
421                len_pi(value, k),
422            );
423        }
424    }
425
426    #[test]
427    fn test_against_zeta() {
428        // BE: π₀ = ζ₁ and π₁ = ζ₂
429        for k in 0..2 {
430            for value in 0..100 {
431                let mut data_pi = [0_u64; 10];
432                let mut data_zeta = [0_u64; 10];
433
434                let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
435                let code_len = writer.write_pi(value, k).unwrap();
436                assert_eq!(code_len, len_pi(value, k));
437                drop(writer);
438
439                let mut writer =
440                    <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
441                let code_len = writer.write_zeta(value, 1 << k).unwrap();
442                assert_eq!(code_len, len_zeta(value, 1 << k));
443                drop(writer);
444
445                assert_eq!(data_pi[0], data_zeta[0]);
446            }
447        }
448
449        // LE: π₀ = ζ₁; π₁ and ζ₂ have the same lengths but permuted bits
450        for value in 0..100 {
451            let mut data_pi = [0_u64; 10];
452            let mut data_zeta = [0_u64; 10];
453
454            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
455            let code_len = writer.write_pi(value, 0).unwrap();
456            assert_eq!(code_len, len_pi(value, 0));
457            drop(writer);
458
459            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
460            let code_len = writer.write_zeta(value, 1).unwrap();
461            assert_eq!(code_len, len_zeta(value, 1));
462            drop(writer);
463
464            assert_eq!(data_pi[0], data_zeta[0]);
465        }
466
467        for value in 0..100 {
468            assert_eq!(len_pi(value, 1), len_zeta(value, 2));
469        }
470    }
471}