Skip to main content

dsi_bitstream/codes/
pi.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Streamlined Apostolico–Drovandi π codes.
9//!
10//! The streamlined π code with parameter *k* ≥ 0 of a natural number *n* is the
11//! concatenation of the [Rice code](super::rice) with parameter 2*ᵏ* of
12//! ⌊log₂(*n* + 1)⌋ and of the binary representation of *n* + 1 with the most
13//! significant bit removed.
14//!
15//! The implied distribution of a π code with parameter *k* is ≈
16//! 1/*x*<sup>1 + 1/2*ᵏ*</sup>.
17//!
18//! Note that π₀ = [ζ₁](super::zeta) = [γ](super::gamma) and π₁ =
19//! [ζ₂](super::zeta). However, due to [subtle problems with
20//! endianness](crate::codes), in the little-endian case π₁ and ζ₂ have the same
21//! codeword lengths but slightly permuted bits.
22//!
23//! This module provides a generic implementation of π codes, and a specialized
24//! implementation for π₂ that may use tables.
25//!
26//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [0 . . 64).
27//!
28//! In the original paper the definition of the code is very convoluted, as the
29//! authors appear to have missed the connection with [Rice codes](super::rice).
30//! The codewords implemented by this module are equivalent to the ones in the
31//! paper, in the sense that corresponding codewords have the same length, but
32//! the codewords for *k* ≥ 2 are different, and encoding/decoding is
33//! faster—hence the name "streamlined π codes".
34//!
35//! # Table-Based Optimization
36//!
37//! Like [δ](super::delta) codes, π codes use a special optimization for partial
38//! decoding. Due to the structure of π codes (a Rice code followed by fixed bits),
39//! when a complete codeword cannot be read from the table, the table may still
40//! provide partial information about the Rice prefix (λ) that was
41//! successfully decoded.
42//! This partial state is used to directly read the remaining λ fixed bits,
43//! avoiding re-reading the Rice prefix.
44//!
45//! # References
46//!
47//! Alberto Apostolico and Guido Drovandi. "[Graph Compression by
48//! BFS](https://doi.org/10.3390/a2031031)", Algorithms, 2:1031-1044, 2009.
49
50use crate::traits::*;
51
52use super::{RiceRead, RiceWrite, len_rice, pi_tables};
53
54/// Returns the length of the π code with parameter `k` for `n`.
55#[must_use]
56#[inline(always)]
57#[allow(clippy::collapsible_if)]
58pub fn len_pi_param<const USE_TABLE: bool>(mut n: u64, k: usize) -> usize {
59    debug_assert!(k < 64);
60    if USE_TABLE {
61        if k == pi_tables::K {
62            // We cannot use .get() here because n is a u64
63            if n < pi_tables::LEN.len() as u64 {
64                return pi_tables::LEN[n as usize] as usize;
65            }
66        }
67    }
68    debug_assert!(n < u64::MAX);
69    n += 1;
70    let λ = n.ilog2() as usize;
71    len_rice(λ as u64, k) + λ
72}
73
74/// Returns the length of the π code for `n` with parameter `k` using
75/// a default value for `USE_TABLE`.
76#[must_use]
77#[inline(always)]
78pub fn len_pi(n: u64, k: usize) -> usize {
79    len_pi_param::<true>(n, k)
80}
81
82/// Trait for reading π codes.
83///
84/// This is the trait you should usually pull into scope to read π codes.
85pub trait PiRead<E: Endianness>: BitRead<E> {
86    fn read_pi(&mut self, k: usize) -> Result<u64, Self::Error>;
87    fn read_pi2(&mut self) -> Result<u64, Self::Error>;
88}
89
90/// Parametric trait for reading π codes.
91///
92/// This trait is more general than [`PiRead`], as it makes it possible
93/// to specify how to use tables using const parameters.
94///
95/// We provide an implementation of this trait for [`BitRead`]. An implementation
96/// of [`PiRead`] using default values is usually provided exploiting the
97/// [`crate::codes::params::ReadParams`] mechanism.
98pub trait PiReadParam<E: Endianness>: BitRead<E> {
99    fn read_pi_param(&mut self, k: usize) -> Result<u64, Self::Error>;
100    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, Self::Error>;
101}
102
103impl<B: BitRead<BE>> PiReadParam<BE> for B {
104    #[inline(always)]
105    fn read_pi_param(&mut self, k: usize) -> Result<u64, B::Error> {
106        default_read_pi(self, k)
107    }
108
109    #[inline(always)]
110    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
111        const {
112            if USE_TABLE {
113                pi_tables::check_read_table(B::PEEK_BITS)
114            }
115        }
116        if USE_TABLE {
117            let (len_with_flag, value_or_lambda) = pi_tables::read_table_be(self);
118            if len_with_flag > 0 {
119                // Complete code - bits already skipped in read_table
120                return Ok(value_or_lambda);
121            } else if len_with_flag < 0 {
122                // Partial code: rice decoded, need to read fixed part
123                // Bits already skipped in read_table
124                let λ = value_or_lambda;
125                debug_assert!(λ < 64);
126                return Ok((1 << λ) + self.read_bits(λ as usize)? - 1);
127            }
128            // len_with_flag == 0: no valid decoding, fall through
129        }
130        default_read_pi(self, 2)
131    }
132}
133
134impl<B: BitRead<LE>> PiReadParam<LE> for B {
135    #[inline(always)]
136    fn read_pi_param(&mut self, k: usize) -> Result<u64, B::Error> {
137        default_read_pi(self, k)
138    }
139
140    #[inline(always)]
141    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
142        const {
143            if USE_TABLE {
144                pi_tables::check_read_table(B::PEEK_BITS)
145            }
146        }
147        if USE_TABLE {
148            let (len_with_flag, value_or_lambda) = pi_tables::read_table_le(self);
149            if len_with_flag > 0 {
150                // Complete code - bits already skipped in read_table
151                return Ok(value_or_lambda);
152            } else if len_with_flag < 0 {
153                // Partial code: rice decoded, need to read fixed part
154                // Bits already skipped in read_table
155                let λ = value_or_lambda;
156                debug_assert!(λ < 64);
157                return Ok((1 << λ) + self.read_bits(λ as usize)? - 1);
158            }
159            // len_with_flag == 0: no valid decoding, fall through
160        }
161        default_read_pi(self, 2)
162    }
163}
164
165/// Default, internal non-table based implementation that works
166/// for any endianness.
167#[inline(always)]
168fn default_read_pi<E: Endianness, B: BitRead<E>>(
169    backend: &mut B,
170    k: usize,
171) -> Result<u64, B::Error> {
172    debug_assert!(k < 64);
173    let λ = backend.read_rice(k)?;
174    debug_assert!(λ < 64);
175    Ok((1 << λ) + backend.read_bits(λ as usize)? - 1)
176}
177
178/// Trait for writing π codes.
179///
180/// This is the trait you should usually pull into scope to write π codes.
181pub trait PiWrite<E: Endianness>: BitWrite<E> {
182    fn write_pi(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
183    fn write_pi2(&mut self, n: u64) -> Result<usize, Self::Error>;
184}
185
186/// Parametric trait for writing π codes.
187///
188/// This trait is more general than [`PiWrite`], as it makes it possible
189/// to specify how to use tables using const parameters.
190///
191/// We provide an implementation of this trait for [`BitWrite`]. An implementation
192/// of [`PiWrite`] using default values is usually provided exploiting the
193/// [`crate::codes::params::WriteParams`] mechanism.
194pub trait PiWriteParam<E: Endianness>: BitWrite<E> {
195    fn write_pi_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
196    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
197}
198
199impl<B: BitWrite<BE>> PiWriteParam<BE> for B {
200    #[inline(always)]
201    fn write_pi_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
202        default_write_pi(self, n, k)
203    }
204
205    #[inline(always)]
206    #[allow(clippy::collapsible_if)]
207    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
208        if USE_TABLE {
209            if let Some(len) = pi_tables::write_table_be(self, n)? {
210                return Ok(len);
211            }
212        }
213        default_write_pi(self, n, 2)
214    }
215}
216
217impl<B: BitWrite<LE>> PiWriteParam<LE> for B {
218    #[inline(always)]
219    fn write_pi_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
220        default_write_pi(self, n, k)
221    }
222
223    #[inline(always)]
224    #[allow(clippy::collapsible_if)]
225    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
226        if USE_TABLE {
227            if let Some(len) = pi_tables::write_table_le(self, n)? {
228                return Ok(len);
229            }
230        }
231        default_write_pi(self, n, 2)
232    }
233}
234
235/// Default, internal non-table based implementation that works
236/// for any endianness.
237#[inline(always)]
238fn default_write_pi<E: Endianness, B: BitWrite<E>>(
239    backend: &mut B,
240    mut n: u64,
241    k: usize,
242) -> Result<usize, B::Error> {
243    debug_assert!(k < 64);
244    debug_assert!(n < u64::MAX);
245
246    n += 1;
247    let λ = n.ilog2() as usize;
248
249    #[cfg(feature = "checks")]
250    {
251        // Clean up n in case checks are enabled
252        n ^= 1 << λ;
253    }
254
255    Ok(backend.write_rice(λ as u64, k)? + backend.write_bits(n, λ)?)
256}
257
258#[cfg(test)]
259mod tests {
260    use crate::{
261        codes::pi::{PiReadParam, PiWriteParam},
262        prelude::*,
263    };
264
265    #[test]
266    fn test_roundtrip() {
267        let k = 3;
268        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
269            let mut data = [0_u64; 10];
270            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
271            let code_len = writer.write_pi(value, k).unwrap();
272            assert_eq!(code_len, len_pi(value, k));
273            drop(writer);
274            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
275            assert_eq!(
276                reader.read_pi(k).unwrap(),
277                value,
278                "for value: {} with k {}",
279                value,
280                k
281            );
282        }
283    }
284
285    #[test]
286    fn test_roundtrip_pi2() {
287        // Test the specialized pi2 methods
288        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
289            // Test BE
290            let mut data = [0_u64; 10];
291            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
292            let code_len = writer.write_pi2(value).unwrap();
293            assert_eq!(code_len, len_pi(value, 2));
294            drop(writer);
295            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
296            assert_eq!(reader.read_pi2().unwrap(), value, "BE for value: {}", value,);
297
298            // Test LE
299            let mut data = [0_u64; 10];
300            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
301            let code_len = writer.write_pi2(value).unwrap();
302            assert_eq!(code_len, len_pi(value, 2));
303            drop(writer);
304            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new_inf(&data));
305            assert_eq!(reader.read_pi2().unwrap(), value, "LE for value: {}", value,);
306        }
307    }
308
309    #[test]
310    fn test_roundtrip_pi2_param() {
311        // Test the parametric pi2 methods with tables
312        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
313            // Test BE with tables
314            let mut data = [0_u64; 10];
315            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
316            let code_len = writer.write_pi2_param::<true>(value).unwrap();
317            assert_eq!(code_len, len_pi(value, 2));
318            drop(writer);
319            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
320            assert_eq!(
321                reader.read_pi2_param::<true>().unwrap(),
322                value,
323                "BE table for value: {}",
324                value,
325            );
326
327            // Test BE without tables
328            let mut data = [0_u64; 10];
329            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
330            let code_len = writer.write_pi2_param::<false>(value).unwrap();
331            assert_eq!(code_len, len_pi(value, 2));
332            drop(writer);
333            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
334            assert_eq!(
335                reader.read_pi2_param::<false>().unwrap(),
336                value,
337                "BE no table for value: {}",
338                value,
339            );
340
341            // Test LE with tables
342            let mut data = [0_u64; 10];
343            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
344            let code_len = writer.write_pi2_param::<true>(value).unwrap();
345            assert_eq!(code_len, len_pi(value, 2));
346            drop(writer);
347            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new_inf(&data));
348            assert_eq!(
349                reader.read_pi2_param::<true>().unwrap(),
350                value,
351                "LE table for value: {}",
352                value,
353            );
354
355            // Test LE without tables
356            let mut data = [0_u64; 10];
357            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
358            let code_len = writer.write_pi2_param::<false>(value).unwrap();
359            assert_eq!(code_len, len_pi(value, 2));
360            drop(writer);
361            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new_inf(&data));
362            assert_eq!(
363                reader.read_pi2_param::<false>().unwrap(),
364                value,
365                "LE no table for value: {}",
366                value,
367            );
368        }
369    }
370
371    #[test]
372    #[allow(clippy::unusual_byte_groupings)]
373    fn test_bits() {
374        for (k, value, expected) in [
375            (2, 20, 0b01_00_0101 << (64 - 8)),
376            (2, 0, 0b100 << (64 - 3)),
377            (2, 1, 0b1010 << (64 - 4)),
378            (2, 2, 0b1011 << (64 - 4)),
379            (2, 3, 0b1_1000 << (64 - 5)),
380            (2, 4, 0b1_1001 << (64 - 5)),
381            (2, 5, 0b1_1010 << (64 - 5)),
382            (2, 6, 0b1_1011 << (64 - 5)),
383            (2, 7, 0b11_1000 << (64 - 6)),
384            (3, 0, 0b1000 << (64 - 4)),
385            (3, 1, 0b1_0010 << (64 - 5)),
386            (3, 2, 0b1_0011 << (64 - 5)),
387            (3, 3, 0b1_01000 << (64 - 6)),
388            (3, 4, 0b1_01001 << (64 - 6)),
389            (3, 5, 0b1_01010 << (64 - 6)),
390            (3, 6, 0b1_01011 << (64 - 6)),
391            (3, 7, 0b101_1000 << (64 - 7)),
392        ] {
393            let mut data = [0_u64; 10];
394            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
395            let code_len = writer.write_pi(value, k).unwrap();
396            assert_eq!(code_len, len_pi(value, k));
397            drop(writer);
398            assert_eq!(
399                data[0].to_be(),
400                expected,
401                "\nfor value: {} with k {}\ngot: {:064b}\nexp: {:064b}\ngot_len: {} exp_len: {}\n",
402                value,
403                k,
404                data[0].to_be(),
405                expected,
406                code_len,
407                len_pi(value, k),
408            );
409        }
410    }
411
412    #[test]
413    fn test_against_zeta() {
414        // BE: π₀ = ζ₁ and π₁ = ζ₂
415        for k in 0..2 {
416            for value in 0..100 {
417                let mut data_pi = [0_u64; 10];
418                let mut data_zeta = [0_u64; 10];
419
420                let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
421                let code_len = writer.write_pi(value, k).unwrap();
422                assert_eq!(code_len, len_pi(value, k));
423                drop(writer);
424
425                let mut writer =
426                    <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
427                let code_len = writer.write_zeta(value, 1 << k).unwrap();
428                assert_eq!(code_len, len_zeta(value, 1 << k));
429                drop(writer);
430
431                assert_eq!(data_pi[0], data_zeta[0]);
432            }
433        }
434
435        // LE: π₀ = ζ₁; π₁ and ζ₂ have the same lengths but permuted bits
436        for value in 0..100 {
437            let mut data_pi = [0_u64; 10];
438            let mut data_zeta = [0_u64; 10];
439
440            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
441            let code_len = writer.write_pi(value, 0).unwrap();
442            assert_eq!(code_len, len_pi(value, 0));
443            drop(writer);
444
445            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
446            let code_len = writer.write_zeta(value, 1).unwrap();
447            assert_eq!(code_len, len_zeta(value, 1));
448            drop(writer);
449
450            assert_eq!(data_pi[0], data_zeta[0]);
451        }
452
453        for value in 0..100 {
454            assert_eq!(len_pi(value, 1), len_zeta(value, 2));
455        }
456    }
457}