Skip to main content

dsi_bitstream/codes/
pi.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Streamlined Apostolico–Drovandi π codes.
9//!
10//! The streamlined π code with parameter *k* ≥ 0 of a natural number *n* is the
11//! concatenation of the [Rice code](super::rice) with parameter 2*ᵏ* of
12//! ⌊log₂(*n* + 1)⌋ and of the binary representation of *n* + 1 with the most
13//! significant bit removed.
14//!
15//! The implied distribution of a π code with parameter *k* is ≈
16//! 1/*x*<sup>1 + 1/2*ᵏ*</sup>.
17//!
18//! Note that π₀ = [ζ₁](super::zeta) = [γ](super::gamma) and π₁ =
19//! [ζ₂](super::zeta). However, due to [subtle problems with
20//! endianness](crate::codes), in the little-endian case π₁ and ζ₂ have the same
21//! codeword lengths but slightly permuted bits.
22//!
23//! This module provides a generic implementation of π codes, and a specialized
24//! implementation for π₂ that may use tables.
25//!
26//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [0 . . 64).
27//!
28//! In the original paper the definition of the code is very convoluted, as the
29//! authors appear to have missed the connection with [Rice codes](super::rice).
30//! The codewords implemented by this module are equivalent to the ones in the
31//! paper, in the sense that corresponding codewords have the same length, but
32//! the codewords for *k* ≥ 2 are different, and encoding/decoding is
33//! faster—hence the name "streamlined π codes".
34//!
35//! # Table-Based Optimization
36//!
37//! Like [δ](super::delta) codes, π codes use a special optimization for partial
38//! decoding. Due to the structure of π codes (a Rice code followed by fixed bits),
39//! when a complete codeword cannot be read from the table, the table may still
40//! provide partial information about the Rice prefix (λ) that was
41//! successfully decoded.
42//! This partial state is used to directly read the remaining λ fixed bits,
43//! avoiding re-reading the Rice prefix.
44//!
45//! # References
46//!
47//! Alberto Apostolico and Guido Drovandi. "[Graph Compression by
48//! BFS](https://doi.org/10.3390/a2031031)", Algorithms, 2:1031-1044, 2009.
49
50use crate::traits::*;
51
52use super::{RiceRead, RiceWrite, len_rice, pi_tables};
53
54/// Returns the length of the π code with parameter `k` for `n`.
55#[must_use]
56#[inline(always)]
57#[allow(clippy::collapsible_if)]
58pub fn len_pi_param<const USE_TABLE: bool>(mut n: u64, k: usize) -> usize {
59    debug_assert!(k < 64);
60    if USE_TABLE {
61        if k == pi_tables::K {
62            if let Some(idx) = pi_tables::LEN.get(n as usize) {
63                return *idx as usize;
64            }
65        }
66    }
67    debug_assert!(n < u64::MAX);
68    n += 1;
69    let λ = n.ilog2() as usize;
70    len_rice(λ as u64, k) + λ
71}
72
73/// Returns the length of the π code for `n` with parameter `k` using
74/// a default value for `USE_TABLE`.
75#[must_use]
76#[inline(always)]
77pub fn len_pi(n: u64, k: usize) -> usize {
78    len_pi_param::<true>(n, k)
79}
80
81/// Trait for reading π codes.
82///
83/// This is the trait you should usually pull into scope to read π codes.
84pub trait PiRead<E: Endianness>: BitRead<E> {
85    fn read_pi(&mut self, k: usize) -> Result<u64, Self::Error>;
86    fn read_pi2(&mut self) -> Result<u64, Self::Error>;
87}
88
89/// Parametric trait for reading π codes.
90///
91/// This trait is more general than [`PiRead`], as it makes it possible
92/// to specify how to use tables using const parameters.
93///
94/// We provide an implementation of this trait for [`BitRead`]. An implementation
95/// of [`PiRead`] using default values is usually provided exploiting the
96/// [`crate::codes::params::ReadParams`] mechanism.
97pub trait PiReadParam<E: Endianness>: BitRead<E> {
98    fn read_pi_param(&mut self, k: usize) -> Result<u64, Self::Error>;
99    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, Self::Error>;
100}
101
102impl<B: BitRead<BE>> PiReadParam<BE> for B {
103    #[inline(always)]
104    fn read_pi_param(&mut self, k: usize) -> Result<u64, B::Error> {
105        default_read_pi(self, k)
106    }
107
108    #[inline(always)]
109    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
110        const {
111            if USE_TABLE {
112                pi_tables::check_read_table(B::PEEK_BITS)
113            }
114        }
115        if USE_TABLE {
116            let (len_with_flag, value_or_lambda) = pi_tables::read_table_be(self);
117            if len_with_flag > 0 {
118                // Complete code - bits already skipped in read_table
119                return Ok(value_or_lambda);
120            } else if len_with_flag < 0 {
121                // Partial code: rice decoded, need to read fixed part
122                // Bits already skipped in read_table
123                let λ = value_or_lambda;
124                debug_assert!(λ < 64);
125                return Ok((1 << λ) + self.read_bits(λ as usize)? - 1);
126            }
127            // len_with_flag == 0: no valid decoding, fall through
128        }
129        default_read_pi(self, 2)
130    }
131}
132
133impl<B: BitRead<LE>> PiReadParam<LE> for B {
134    #[inline(always)]
135    fn read_pi_param(&mut self, k: usize) -> Result<u64, B::Error> {
136        default_read_pi(self, k)
137    }
138
139    #[inline(always)]
140    fn read_pi2_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
141        const {
142            if USE_TABLE {
143                pi_tables::check_read_table(B::PEEK_BITS)
144            }
145        }
146        if USE_TABLE {
147            let (len_with_flag, value_or_lambda) = pi_tables::read_table_le(self);
148            if len_with_flag > 0 {
149                // Complete code - bits already skipped in read_table
150                return Ok(value_or_lambda);
151            } else if len_with_flag < 0 {
152                // Partial code: rice decoded, need to read fixed part
153                // Bits already skipped in read_table
154                let λ = value_or_lambda;
155                debug_assert!(λ < 64);
156                return Ok((1 << λ) + self.read_bits(λ as usize)? - 1);
157            }
158            // len_with_flag == 0: no valid decoding, fall through
159        }
160        default_read_pi(self, 2)
161    }
162}
163
164/// Default, internal non-table based implementation that works
165/// for any endianness.
166#[inline(always)]
167fn default_read_pi<E: Endianness, B: BitRead<E>>(
168    backend: &mut B,
169    k: usize,
170) -> Result<u64, B::Error> {
171    debug_assert!(k < 64);
172    let λ = backend.read_rice(k)?;
173    debug_assert!(λ < 64);
174    Ok((1 << λ) + backend.read_bits(λ as usize)? - 1)
175}
176
177/// Trait for writing π codes.
178///
179/// This is the trait you should usually pull into scope to write π codes.
180pub trait PiWrite<E: Endianness>: BitWrite<E> {
181    fn write_pi(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
182    fn write_pi2(&mut self, n: u64) -> Result<usize, Self::Error>;
183}
184
185/// Parametric trait for writing π codes.
186///
187/// This trait is more general than [`PiWrite`], as it makes it possible
188/// to specify how to use tables using const parameters.
189///
190/// We provide an implementation of this trait for [`BitWrite`]. An implementation
191/// of [`PiWrite`] using default values is usually provided exploiting the
192/// [`crate::codes::params::WriteParams`] mechanism.
193pub trait PiWriteParam<E: Endianness>: BitWrite<E> {
194    fn write_pi_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
195    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
196}
197
198impl<B: BitWrite<BE>> PiWriteParam<BE> for B {
199    #[inline(always)]
200    fn write_pi_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
201        default_write_pi(self, n, k)
202    }
203
204    #[inline(always)]
205    #[allow(clippy::collapsible_if)]
206    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
207        if USE_TABLE {
208            if let Some(len) = pi_tables::write_table_be(self, n)? {
209                return Ok(len);
210            }
211        }
212        default_write_pi(self, n, 2)
213    }
214}
215
216impl<B: BitWrite<LE>> PiWriteParam<LE> for B {
217    #[inline(always)]
218    fn write_pi_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
219        default_write_pi(self, n, k)
220    }
221
222    #[inline(always)]
223    #[allow(clippy::collapsible_if)]
224    fn write_pi2_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
225        if USE_TABLE {
226            if let Some(len) = pi_tables::write_table_le(self, n)? {
227                return Ok(len);
228            }
229        }
230        default_write_pi(self, n, 2)
231    }
232}
233
234/// Default, internal non-table based implementation that works
235/// for any endianness.
236#[inline(always)]
237fn default_write_pi<E: Endianness, B: BitWrite<E>>(
238    backend: &mut B,
239    mut n: u64,
240    k: usize,
241) -> Result<usize, B::Error> {
242    debug_assert!(k < 64);
243    debug_assert!(n < u64::MAX);
244
245    n += 1;
246    let λ = n.ilog2() as usize;
247
248    #[cfg(feature = "checks")]
249    {
250        // Clean up n in case checks are enabled
251        n ^= 1 << λ;
252    }
253
254    Ok(backend.write_rice(λ as u64, k)? + backend.write_bits(n, λ)?)
255}
256
257#[cfg(test)]
258mod tests {
259    use crate::{
260        codes::pi::{PiReadParam, PiWriteParam},
261        prelude::*,
262    };
263
264    #[test]
265    fn test_roundtrip() {
266        let k = 3;
267        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
268            let mut data = [0_u64; 10];
269            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
270            let code_len = writer.write_pi(value, k).unwrap();
271            assert_eq!(code_len, len_pi(value, k));
272            drop(writer);
273            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
274            assert_eq!(
275                reader.read_pi(k).unwrap(),
276                value,
277                "for value: {} with k {}",
278                value,
279                k
280            );
281        }
282    }
283
284    #[test]
285    fn test_roundtrip_pi2() {
286        // Test the specialized pi2 methods
287        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
288            // Test BE
289            let mut data = [0_u64; 10];
290            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
291            let code_len = writer.write_pi2(value).unwrap();
292            assert_eq!(code_len, len_pi(value, 2));
293            drop(writer);
294            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
295            assert_eq!(reader.read_pi2().unwrap(), value, "BE for value: {}", value,);
296
297            // Test LE
298            let mut data = [0_u64; 10];
299            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
300            let code_len = writer.write_pi2(value).unwrap();
301            assert_eq!(code_len, len_pi(value, 2));
302            drop(writer);
303            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new_inf(&data));
304            assert_eq!(reader.read_pi2().unwrap(), value, "LE for value: {}", value,);
305        }
306    }
307
308    #[test]
309    fn test_roundtrip_pi2_param() {
310        // Test the parametric pi2 methods with tables
311        for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
312            // Test BE with tables
313            let mut data = [0_u64; 10];
314            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
315            let code_len = writer.write_pi2_param::<true>(value).unwrap();
316            assert_eq!(code_len, len_pi(value, 2));
317            drop(writer);
318            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
319            assert_eq!(
320                reader.read_pi2_param::<true>().unwrap(),
321                value,
322                "BE table for value: {}",
323                value,
324            );
325
326            // Test BE without tables
327            let mut data = [0_u64; 10];
328            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
329            let code_len = writer.write_pi2_param::<false>(value).unwrap();
330            assert_eq!(code_len, len_pi(value, 2));
331            drop(writer);
332            let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new_inf(&data));
333            assert_eq!(
334                reader.read_pi2_param::<false>().unwrap(),
335                value,
336                "BE no table for value: {}",
337                value,
338            );
339
340            // Test LE with tables
341            let mut data = [0_u64; 10];
342            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
343            let code_len = writer.write_pi2_param::<true>(value).unwrap();
344            assert_eq!(code_len, len_pi(value, 2));
345            drop(writer);
346            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new_inf(&data));
347            assert_eq!(
348                reader.read_pi2_param::<true>().unwrap(),
349                value,
350                "LE table for value: {}",
351                value,
352            );
353
354            // Test LE without tables
355            let mut data = [0_u64; 10];
356            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data));
357            let code_len = writer.write_pi2_param::<false>(value).unwrap();
358            assert_eq!(code_len, len_pi(value, 2));
359            drop(writer);
360            let mut reader = <BufBitReader<LE, _>>::new(MemWordReader::new_inf(&data));
361            assert_eq!(
362                reader.read_pi2_param::<false>().unwrap(),
363                value,
364                "LE no table for value: {}",
365                value,
366            );
367        }
368    }
369
370    #[test]
371    #[allow(clippy::unusual_byte_groupings)]
372    fn test_bits() {
373        for (k, value, expected) in [
374            (2, 20, 0b01_00_0101 << (64 - 8)),
375            (2, 0, 0b100 << (64 - 3)),
376            (2, 1, 0b1010 << (64 - 4)),
377            (2, 2, 0b1011 << (64 - 4)),
378            (2, 3, 0b1_1000 << (64 - 5)),
379            (2, 4, 0b1_1001 << (64 - 5)),
380            (2, 5, 0b1_1010 << (64 - 5)),
381            (2, 6, 0b1_1011 << (64 - 5)),
382            (2, 7, 0b11_1000 << (64 - 6)),
383            (3, 0, 0b1000 << (64 - 4)),
384            (3, 1, 0b1_0010 << (64 - 5)),
385            (3, 2, 0b1_0011 << (64 - 5)),
386            (3, 3, 0b1_01000 << (64 - 6)),
387            (3, 4, 0b1_01001 << (64 - 6)),
388            (3, 5, 0b1_01010 << (64 - 6)),
389            (3, 6, 0b1_01011 << (64 - 6)),
390            (3, 7, 0b101_1000 << (64 - 7)),
391        ] {
392            let mut data = [0_u64; 10];
393            let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data));
394            let code_len = writer.write_pi(value, k).unwrap();
395            assert_eq!(code_len, len_pi(value, k));
396            drop(writer);
397            assert_eq!(
398                data[0].to_be(),
399                expected,
400                "\nfor value: {} with k {}\ngot: {:064b}\nexp: {:064b}\ngot_len: {} exp_len: {}\n",
401                value,
402                k,
403                data[0].to_be(),
404                expected,
405                code_len,
406                len_pi(value, k),
407            );
408        }
409    }
410
411    #[test]
412    fn test_against_zeta() {
413        // BE: π₀ = ζ₁ and π₁ = ζ₂
414        for k in 0..2 {
415            for value in 0..100 {
416                let mut data_pi = [0_u64; 10];
417                let mut data_zeta = [0_u64; 10];
418
419                let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
420                let code_len = writer.write_pi(value, k).unwrap();
421                assert_eq!(code_len, len_pi(value, k));
422                drop(writer);
423
424                let mut writer =
425                    <BufBitWriter<BE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
426                let code_len = writer.write_zeta(value, 1 << k).unwrap();
427                assert_eq!(code_len, len_zeta(value, 1 << k));
428                drop(writer);
429
430                assert_eq!(data_pi[0], data_zeta[0]);
431            }
432        }
433
434        // LE: π₀ = ζ₁; π₁ and ζ₂ have the same lengths but permuted bits
435        for value in 0..100 {
436            let mut data_pi = [0_u64; 10];
437            let mut data_zeta = [0_u64; 10];
438
439            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_pi));
440            let code_len = writer.write_pi(value, 0).unwrap();
441            assert_eq!(code_len, len_pi(value, 0));
442            drop(writer);
443
444            let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterSlice::new(&mut data_zeta));
445            let code_len = writer.write_zeta(value, 1).unwrap();
446            assert_eq!(code_len, len_zeta(value, 1));
447            drop(writer);
448
449            assert_eq!(data_pi[0], data_zeta[0]);
450        }
451
452        for value in 0..100 {
453            assert_eq!(len_pi(value, 1), len_zeta(value, 2));
454        }
455    }
456}