dsi_bitstream/codes/
zeta.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9//! Boldi−Vigna ζ codes.
10//!
11//! The ζ code with parameter *k* ≥ 1 of a natural number *n* is the
12//! concatenation of of the unary code of *h* = ⌊⌊log₂(*n* + 1)⌋ / *k*⌋ and of
13//! the [minimal binary code](crate::codes::minimal_binary) of *n* + 1 − 2*ʰᵏ*
14//! with 2⁽*ʰ* ⁺ ¹⁾*ᵏ* − 2*ʰᵏ* as upper bound.
15//!
16//! The implied distribution of a ζ code with parameter *k* is ≈ 1/*x*<sup>1 +
17//! 1/*k*</sup>.
18//!
19//! Note that ζ₁ = [π₀](crate::codes::pi) = [γ](crate::codes::gamma) and ζ₂ =
20//! [π₁](crate::codes::pi). However, due to [subtle problems with
21//! endianness](crate::codes), in the little-endian case ζ₂ and π₁ have the same
22//! codeword lengths but slightly permuted bits.
23//!
24//! This module provides a generic implementation of ζ codes, and a specialized
25//! implementation for ζ₃ that may use tables.
26//!
27//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [1 . . 64).
28//!
29//! # References
30//!
31//! Paolo Boldi and Sebastiano Vigna. “[Codes for the World−Wide
32//! Web](https://doi.org/10.1080/15427951.2005.10129113)”. Internet Math.,
33//! 2(4):405−427, 2005.
34
35use super::{MinimalBinaryRead, MinimalBinaryWrite, len_minimal_binary, zeta_tables};
36use crate::traits::*;
37
38/// Returns the length of the ζ code with parameter `k` for `n`.
39#[must_use]
40#[inline(always)]
41#[allow(clippy::collapsible_if)]
42pub fn len_zeta_param<const USE_TABLE: bool>(mut n: u64, k: usize) -> usize {
43    debug_assert!(k >= 1);
44    if USE_TABLE {
45        if k == zeta_tables::K {
46            if let Some(idx) = zeta_tables::LEN.get(n as usize) {
47                return *idx as usize;
48            }
49        }
50    }
51    debug_assert!(n < u64::MAX);
52    n += 1;
53    let h = n.ilog2() as usize / k;
54    let l = 1 << (h * k);
55    h + 1 + len_minimal_binary(n - l, (l << k).wrapping_sub(l))
56}
57
58/// Returns the length of the ζ code with parameter `k` for `n` using
59/// a default value for `USE_TABLE`.
60#[inline(always)]
61pub fn len_zeta(n: u64, k: usize) -> usize {
62    len_zeta_param::<true>(n, k)
63}
64
65/// Trait for reading ζ codes.
66///
67/// This is the trait you should usually pull in scope to read ζ codes.
68pub trait ZetaRead<E: Endianness>: BitRead<E> {
69    fn read_zeta(&mut self, k: usize) -> Result<u64, Self::Error>;
70    fn read_zeta3(&mut self) -> Result<u64, Self::Error>;
71}
72
73/// Parametric trait for reading ζ codes.
74///
75/// This trait is is more general than [`ZetaRead`], as it makes it possible
76/// to specify how to use tables using const parameters.
77///
78/// We provide an implementation of this trait for [`BitRead`]. An implementation
79/// of [`ZetaRead`] using default values is usually provided exploiting the
80/// [`crate::codes::params::ReadParams`] mechanism.
81pub trait ZetaReadParam<E: Endianness>: MinimalBinaryRead<E> {
82    fn read_zeta_param(&mut self, k: usize) -> Result<u64, Self::Error>;
83    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, Self::Error>;
84}
85
86impl<B: BitRead<BE>> ZetaReadParam<BE> for B {
87    #[inline(always)]
88    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
89        default_read_zeta(self, k)
90    }
91
92    #[inline(always)]
93    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
94        if USE_TABLE {
95            if let Some((res, _)) = zeta_tables::read_table_be(self) {
96                return Ok(res);
97            }
98        }
99        default_read_zeta(self, 3)
100    }
101}
102
103impl<B: BitRead<LE>> ZetaReadParam<LE> for B {
104    #[inline(always)]
105    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
106        default_read_zeta(self, k)
107    }
108
109    #[inline(always)]
110    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
111        if USE_TABLE {
112            if let Some((res, _)) = zeta_tables::read_table_le(self) {
113                return Ok(res);
114            }
115        }
116        default_read_zeta(self, 3)
117    }
118}
119
120/// Default, internal non-table based implementation that works
121/// for any endianness.
122#[inline(always)]
123fn default_read_zeta<BO: Endianness, B: BitRead<BO>>(
124    backend: &mut B,
125    k: usize,
126) -> Result<u64, B::Error> {
127    debug_assert!(k >= 1);
128    let h = backend.read_unary()? as usize;
129    debug_assert!(h * k < 64);
130    let l = 1_u64 << (h * k);
131    let res = backend.read_minimal_binary((l << k).wrapping_sub(l))?;
132    Ok(l + res - 1)
133}
134
135/// Trait for writing ζ codes.
136///
137/// This is the trait you should usually pull in scope to write ζ codes.
138pub trait ZetaWrite<E: Endianness>: BitWrite<E> {
139    fn write_zeta(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
140    fn write_zeta3(&mut self, n: u64) -> Result<usize, Self::Error>;
141}
142
143/// Parametric trait for writing ζ codes.
144///
145/// This trait is is more general than [`ZetaWrite`], as it makes it possible
146/// to specify how to use tables using const parameters.
147///
148/// We provide an implementation of this trait for [`BitWrite`]. An implementation
149/// of [`ZetaWrite`] using default values is usually provided exploiting the
150/// [`crate::codes::params::WriteParams`] mechanism.
151pub trait ZetaWriteParam<E: Endianness>: MinimalBinaryWrite<E> {
152    fn write_zeta_param<const USE_TABLE: bool>(
153        &mut self,
154        n: u64,
155        k: usize,
156    ) -> Result<usize, Self::Error>;
157    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
158}
159
160impl<B: BitWrite<BE>> ZetaWriteParam<BE> for B {
161    #[inline(always)]
162    fn write_zeta_param<const USE_TABLE: bool>(
163        &mut self,
164        n: u64,
165        k: usize,
166    ) -> Result<usize, Self::Error> {
167        default_write_zeta(self, n, k)
168    }
169
170    #[inline(always)]
171    #[allow(clippy::collapsible_if)]
172    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
173        if USE_TABLE {
174            if let Some(len) = zeta_tables::write_table_be(self, n)? {
175                return Ok(len);
176            }
177        }
178        default_write_zeta(self, n, 3)
179    }
180}
181
182impl<B: BitWrite<LE>> ZetaWriteParam<LE> for B {
183    #[inline(always)]
184    fn write_zeta_param<const USE_TABLE: bool>(
185        &mut self,
186        n: u64,
187        k: usize,
188    ) -> Result<usize, Self::Error> {
189        default_write_zeta(self, n, k)
190    }
191
192    #[inline(always)]
193    #[allow(clippy::collapsible_if)]
194    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
195        if USE_TABLE {
196            if let Some(len) = zeta_tables::write_table_le(self, n)? {
197                return Ok(len);
198            }
199        }
200        default_write_zeta(self, n, 3)
201    }
202}
203
204/// Default, internal non-table based implementation that works
205/// for any endianness.
206#[inline(always)]
207fn default_write_zeta<E: Endianness, B: BitWrite<E>>(
208    backend: &mut B,
209    mut n: u64,
210    k: usize,
211) -> Result<usize, B::Error> {
212    debug_assert!(k >= 1);
213    debug_assert!(n < u64::MAX);
214    n += 1;
215    let h = n.ilog2() as usize / k;
216    let l = 1 << (h * k);
217
218    debug_assert!(l <= n, "{} <= {}", l, n);
219
220    // Write the code
221    Ok(backend.write_unary(h as u64)?
222        + backend.write_minimal_binary(n - l, (l << k).wrapping_sub(l))?)
223}