dsi_bitstream/codes/pi.rs
1/*
2 * SPDX-FileCopyrightText: 2024 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8//! Streamlined Apostolico−Drovandi π codes
9//!
10//! The streamlined π code with parameter *k* ≥ 0 of a natural number *n* is the
11//! concatenation of the [Rice code](super::rice) with parameter *k* of
12//! ⌊log₂(*n* + 1)⌋ and of the binary representation of *n* + 1 with the most
13//! significant bit removed.
14//!
15//! The implied distribution of a π code with parameter *k* code is ≈
16//! 1/*x*<sup>1 + 1/2*ᵏ*</sup>.
17//!
18//! Note that π₀ = [ζ₁](super::zeta) = [γ](super::gamma) and π₁ =
19//! [ζ₂](super::zeta). However, due to [subtle problems with
20//! endianness](crate::codes), in the little-endian case π₁ and ζ₂ have the same
21//! codeword lengths but slightly permuted bits.
22//!
23//! In the original paper the definition of the code is very convoluted, as the
24//! authors appear to have missed the connection with [Rice codes](super::rice).
25//! The codewords implemented by this module are equivalent to the ones in the
26//! paper, in the sense that corresponding codewords have the same length, but
27//! the codewords for *k* ≥ 2 are different, and encoding/decoding is
28//! faster—hence the name “streamlined π codes”.
29//!
30//! # References
31//!
32//! Alberto Apostolico and Guido Drovandi. “[Graph Compression by
33//! BFS](https://doi.org/10.3390/a2031031)”, Algorithms, 2:1031-1044, 2009.
34
35use crate::traits::*;
36
37use super::{len_rice, RiceRead, RiceWrite};
38
39/// Return the length of the π code for `n` with parameter `k`.
40///
41/// ```rust
42/// use dsi_bitstream::codes::len_pi;
43///
44/// // k = 0
45/// assert_eq!(len_pi(0, 0), 1, "π_0(0)");
46/// assert_eq!(len_pi(1, 0), 3, "π_0(1)");
47/// assert_eq!(len_pi(2, 0), 3, "π_0(2)");
48/// assert_eq!(len_pi(3, 0), 5, "π_0(3)");
49/// assert_eq!(len_pi(4, 0), 5, "π_0(4)");
50/// assert_eq!(len_pi(5, 0), 5, "π_0(5)");
51/// assert_eq!(len_pi(6, 0), 5, "π_0(6)");
52/// assert_eq!(len_pi(7, 0), 7, "π_0(7)");
53///
54/// // k = 1
55/// assert_eq!(len_pi(0, 1), 2, "π_1(0)");
56/// assert_eq!(len_pi(1, 1), 3, "π_1(1)");
57/// assert_eq!(len_pi(2, 1), 3, "π_1(2)");
58/// assert_eq!(len_pi(3, 1), 5, "π_1(3)");
59/// assert_eq!(len_pi(4, 1), 5, "π_1(4)");
60/// assert_eq!(len_pi(5, 1), 5, "π_1(5)");
61/// assert_eq!(len_pi(6, 1), 5, "π_1(6)");
62/// assert_eq!(len_pi(7, 1), 6, "π_1(7)");
63///
64/// // k = 2
65/// assert_eq!(len_pi(0, 2), 3, "π_2(0)");
66/// assert_eq!(len_pi(1, 2), 4, "π_2(1)");
67/// assert_eq!(len_pi(2, 2), 4, "π_2(2)");
68/// assert_eq!(len_pi(3, 2), 5, "π_2(3)");
69/// assert_eq!(len_pi(4, 2), 5, "π_2(4)");
70/// assert_eq!(len_pi(5, 2), 5, "π_2(5)");
71/// assert_eq!(len_pi(6, 2), 5, "π_2(6)");
72/// assert_eq!(len_pi(7, 2), 6, "π_2(7)");
73///
74/// // k = 3
75/// assert_eq!(len_pi(0, 3), 4, "π_3(0)");
76/// assert_eq!(len_pi(1, 3), 5, "π_3(1)");
77/// assert_eq!(len_pi(2, 3), 5, "π_3(2)");
78/// assert_eq!(len_pi(3, 3), 6, "π_3(3)");
79/// assert_eq!(len_pi(4, 3), 6, "π_3(4)");
80/// assert_eq!(len_pi(5, 3), 6, "π_3(5)");
81/// assert_eq!(len_pi(6, 3), 6, "π_3(6)");
82/// assert_eq!(len_pi(7, 3), 7, "π_3(7)");
83/// ```
84#[must_use]
85#[inline(always)]
86pub fn len_pi(mut n: u64, k: usize) -> usize {
87 n += 1;
88 let λ = n.ilog2() as usize;
89 len_rice(λ as u64, k) + λ
90}
91
92/// Trait for reading π codes.
93///
94/// This is the trait you should pull in scope to read π codes.
95pub trait PiRead<E: Endianness>: BitRead<E> + RiceRead<E> {
96 #[inline(always)]
97 fn read_pi(&mut self, k: usize) -> Result<u64, Self::Error> {
98 let λ = self.read_rice(k)?;
99 Ok((1 << λ) + self.read_bits(λ as usize)? - 1)
100 }
101}
102
103/// Trait for writing π codes.
104///
105/// This is the trait you should pull in scope to write π codes.
106pub trait PiWrite<E: Endianness>: BitWrite<E> + RiceWrite<E> {
107 #[inline(always)]
108 fn write_pi(&mut self, mut n: u64, k: usize) -> Result<usize, Self::Error> {
109 n += 1;
110 let λ = n.ilog2() as usize;
111
112 #[cfg(feature = "checks")]
113 {
114 // Clean up n in case checks are enabled
115 n ^= 1 << λ;
116 }
117
118 Ok(self.write_rice(λ as u64, k)? + self.write_bits(n, λ)?)
119 }
120}
121
122impl<E: Endianness, B: BitRead<E> + RiceRead<E> + ?Sized> PiRead<E> for B {}
123impl<E: Endianness, B: BitWrite<E> + RiceWrite<E> + ?Sized> PiWrite<E> for B {}
124
125#[cfg(test)]
126mod test {
127 use crate::prelude::*;
128
129 #[test]
130 fn test_roundtrip() {
131 let k = 3;
132 for value in (0..64).map(|i| 1 << i).chain(0..1024).chain([u64::MAX - 1]) {
133 let mut data = vec![0_u64];
134 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data));
135 let code_len = writer.write_pi(value, k).unwrap();
136 assert_eq!(code_len, len_pi(value, k));
137 drop(writer);
138 let mut reader = <BufBitReader<BE, _>>::new(MemWordReader::new(&data));
139 assert_eq!(
140 reader.read_pi(k).unwrap(),
141 value,
142 "for value: {} with k {}",
143 value,
144 k
145 );
146 }
147 }
148
149 #[test]
150 #[allow(clippy::unusual_byte_groupings)]
151 fn test_bits() {
152 for (k, value, expected) in [
153 (2, 20, 0b01_00_0101 << (64 - 8)),
154 (2, 0, 0b100 << (64 - 3)),
155 (2, 1, 0b1010 << (64 - 4)),
156 (2, 2, 0b1011 << (64 - 4)),
157 (2, 3, 0b1_1000 << (64 - 5)),
158 (2, 4, 0b1_1001 << (64 - 5)),
159 (2, 5, 0b1_1010 << (64 - 5)),
160 (2, 6, 0b1_1011 << (64 - 5)),
161 (2, 7, 0b11_1000 << (64 - 6)),
162 (3, 0, 0b1000 << (64 - 4)),
163 (3, 1, 0b1_0010 << (64 - 5)),
164 (3, 2, 0b1_0011 << (64 - 5)),
165 (3, 3, 0b1_01000 << (64 - 6)),
166 (3, 4, 0b1_01001 << (64 - 6)),
167 (3, 5, 0b1_01010 << (64 - 6)),
168 (3, 6, 0b1_01011 << (64 - 6)),
169 (3, 7, 0b101_1000 << (64 - 7)),
170 ] {
171 let mut data = vec![0_u64];
172 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data));
173 let code_len = writer.write_pi(value, k).unwrap();
174 assert_eq!(code_len, len_pi(value, k));
175 drop(writer);
176 assert_eq!(
177 data[0].to_be(),
178 expected,
179 "\nfor value: {} with k {}\ngot: {:064b}\nexp: {:064b}\ngot_len: {} exp_len: {}\n",
180 value,
181 k,
182 data[0].to_be(),
183 expected,
184 code_len,
185 len_pi(value, k),
186 );
187 }
188 }
189
190 #[test]
191 fn test_against_zeta() {
192 // BE: π₀ = ζ₁ and π₁ = ζ₂
193 for k in 0..2 {
194 for value in 0..100 {
195 let mut data_pi = vec![0_u64];
196 let mut data_zeta = vec![0_u64];
197
198 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data_pi));
199 let code_len = writer.write_pi(value, k).unwrap();
200 assert_eq!(code_len, len_pi(value, k));
201 drop(writer);
202
203 let mut writer = <BufBitWriter<BE, _>>::new(MemWordWriterVec::new(&mut data_zeta));
204 let code_len = writer.write_zeta(value, 1 << k).unwrap();
205 assert_eq!(code_len, len_zeta(value, 1 << k));
206 drop(writer);
207
208 assert_eq!(data_pi[0], data_zeta[0]);
209 }
210 }
211
212 // LE: π₀ = ζ₁; π₁ and ζ₂ have the same lengths but permuted bits
213 for value in 0..100 {
214 let mut data_pi = vec![0_u64];
215 let mut data_zeta = vec![0_u64];
216
217 let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterVec::new(&mut data_pi));
218 let code_len = writer.write_pi(value, 0).unwrap();
219 assert_eq!(code_len, len_pi(value, 0));
220 drop(writer);
221
222 let mut writer = <BufBitWriter<LE, _>>::new(MemWordWriterVec::new(&mut data_zeta));
223 let code_len = writer.write_zeta(value, 1).unwrap();
224 assert_eq!(code_len, len_zeta(value, 1));
225 drop(writer);
226
227 assert_eq!(data_pi[0], data_zeta[0]);
228 }
229
230 for value in 0..100 {
231 assert_eq!(len_pi(value, 1), len_zeta(value, 2));
232 }
233 }
234}