dsi_bitstream/codes/
zeta.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9//! Boldi−Vigna ζ codes.
10//!
11//! The ζ code with parameter *k* ≥ 1 of a natural number *n* is the
12//! concatenation of of the unary code of *h* = ⌊⌊log₂(*n* + 1)⌋ / *k*⌋ and of
13//! the [minimal binary code](crate::codes::minimal_binary) of *n* + 1 − 2*ʰᵏ*
14//! with 2⁽*ʰ* ⁺ ¹⁾*ᵏ* − 2*ʰᵏ* as upper bound.
15//!
16//! The implied distribution of a ζ code with parameter *k* is ≈ 1/*x*<sup>1 +
17//! 1/*k*</sup>.
18//!
19//! Note that ζ₁ = [π₀](crate::codes::pi) = [γ](crate::codes::gamma) and ζ₂ =
20//! [π₁](crate::codes::pi). However, due to [subtle problems with
21//! endianness](crate::codes), in the little-endian case ζ₂ and π₁ have the same
22//! codeword lengths but slightly permuted bits.
23//!
24//! This module provides a generic implementation of ζ codes, and a specialized
25//! implementation for ζ₃ that may use tables.
26//!
27//! # References
28//!
29//! Paolo Boldi and Sebastiano Vigna. “[Codes for the World−Wide
30//! Web](https://doi.org/10.1080/15427951.2005.10129113)”. Internet Math.,
31//! 2(4):405−427, 2005.
32
33use super::{len_minimal_binary, zeta_tables, MinimalBinaryRead, MinimalBinaryWrite};
34use crate::traits::*;
35
36/// Returns the length of the ζ code with parameter `k` for `n`.
37#[must_use]
38#[inline(always)]
39#[allow(clippy::collapsible_if)]
40pub fn len_zeta_param<const USE_TABLE: bool>(mut n: u64, k: usize) -> usize {
41    if USE_TABLE {
42        if k == zeta_tables::K {
43            if let Some(idx) = zeta_tables::LEN.get(n as usize) {
44                return *idx as usize;
45            }
46        }
47    }
48    n += 1;
49    let h = n.ilog2() as usize / k;
50    let l = 1 << (h * k);
51    h + 1 + len_minimal_binary(n - l, (l << k).wrapping_sub(l))
52}
53
54/// Returns the length of the ζ code with parameter `k` for `n` using
55/// a default value for `USE_TABLE`.
56#[inline(always)]
57pub fn len_zeta(n: u64, k: usize) -> usize {
58    len_zeta_param::<true>(n, k)
59}
60
61/// Trait for reading ζ codes.
62///
63/// This is the trait you should usually pull in scope to read ζ codes.
64pub trait ZetaRead<E: Endianness>: BitRead<E> {
65    fn read_zeta(&mut self, k: usize) -> Result<u64, Self::Error>;
66    fn read_zeta3(&mut self) -> Result<u64, Self::Error>;
67}
68
69/// Parametric trait for reading ζ codes.
70///
71/// This trait is is more general than [`ZetaRead`], as it makes it possible
72/// to specify how to use tables using const parameters.
73///
74/// We provide an implementation of this trait for [`BitRead`]. An implementation
75/// of [`ZetaRead`] using default values is usually provided exploiting the
76/// [`crate::codes::params::ReadParams`] mechanism.
77pub trait ZetaReadParam<E: Endianness>: MinimalBinaryRead<E> {
78    fn read_zeta_param(&mut self, k: usize) -> Result<u64, Self::Error>;
79    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, Self::Error>;
80}
81
82impl<B: BitRead<BE>> ZetaReadParam<BE> for B {
83    #[inline(always)]
84    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
85        default_read_zeta(self, k)
86    }
87
88    #[inline(always)]
89    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
90        if USE_TABLE {
91            if let Some((res, _)) = zeta_tables::read_table_be(self) {
92                return Ok(res);
93            }
94        }
95        default_read_zeta(self, 3)
96    }
97}
98
99impl<B: BitRead<LE>> ZetaReadParam<LE> for B {
100    #[inline(always)]
101    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
102        default_read_zeta(self, k)
103    }
104
105    #[inline(always)]
106    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
107        if USE_TABLE {
108            if let Some((res, _)) = zeta_tables::read_table_le(self) {
109                return Ok(res);
110            }
111        }
112        default_read_zeta(self, 3)
113    }
114}
115
116/// Default, internal non-table based implementation that works
117/// for any endianness.
118#[inline(always)]
119fn default_read_zeta<BO: Endianness, B: BitRead<BO>>(
120    backend: &mut B,
121    k: usize,
122) -> Result<u64, B::Error> {
123    let h = backend.read_unary()? as usize;
124    let l = 1_u64 << (h * k);
125    let res = backend.read_minimal_binary((l << k).wrapping_sub(l))?;
126    Ok(l + res - 1)
127}
128
129/// Trait for writing ζ codes.
130///
131/// This is the trait you should usually pull in scope to write ζ codes.
132pub trait ZetaWrite<E: Endianness>: BitWrite<E> {
133    fn write_zeta(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
134    fn write_zeta3(&mut self, n: u64) -> Result<usize, Self::Error>;
135}
136
137/// Parametric trait for writing ζ codes.
138///
139/// This trait is is more general than [`ZetaWrite`], as it makes it possible
140/// to specify how to use tables using const parameters.
141///
142/// We provide an implementation of this trait for [`BitWrite`]. An implementation
143/// of [`ZetaWrite`] using default values is usually provided exploiting the
144/// [`crate::codes::params::WriteParams`] mechanism.
145pub trait ZetaWriteParam<E: Endianness>: MinimalBinaryWrite<E> {
146    fn write_zeta_param<const USE_TABLE: bool>(
147        &mut self,
148        n: u64,
149        k: usize,
150    ) -> Result<usize, Self::Error>;
151    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
152}
153
154impl<B: BitWrite<BE>> ZetaWriteParam<BE> for B {
155    #[inline(always)]
156    fn write_zeta_param<const USE_TABLE: bool>(
157        &mut self,
158        n: u64,
159        k: usize,
160    ) -> Result<usize, Self::Error> {
161        default_write_zeta(self, n, k)
162    }
163
164    #[inline(always)]
165    #[allow(clippy::collapsible_if)]
166    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
167        if USE_TABLE {
168            if let Some(len) = zeta_tables::write_table_be(self, n)? {
169                return Ok(len);
170            }
171        }
172        default_write_zeta(self, n, 3)
173    }
174}
175
176impl<B: BitWrite<LE>> ZetaWriteParam<LE> for B {
177    #[inline(always)]
178    fn write_zeta_param<const USE_TABLE: bool>(
179        &mut self,
180        n: u64,
181        k: usize,
182    ) -> Result<usize, Self::Error> {
183        default_write_zeta(self, n, k)
184    }
185
186    #[inline(always)]
187    #[allow(clippy::collapsible_if)]
188    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
189        if USE_TABLE {
190            if let Some(len) = zeta_tables::write_table_le(self, n)? {
191                return Ok(len);
192            }
193        }
194        default_write_zeta(self, n, 3)
195    }
196}
197
198/// Default, internal non-table based implementation that works
199/// for any endianness.
200#[inline(always)]
201fn default_write_zeta<E: Endianness, B: BitWrite<E>>(
202    backend: &mut B,
203    mut n: u64,
204    k: usize,
205) -> Result<usize, B::Error> {
206    n += 1;
207    let h = n.ilog2() as usize / k;
208    let l = 1 << (h * k);
209
210    debug_assert!(l <= n, "{} <= {}", l, n);
211
212    // Write the code
213    Ok(backend.write_unary(h as u64)?
214        + backend.write_minimal_binary(n - l, (l << k).wrapping_sub(l))?)
215}