Skip to main content

dsi_bitstream/codes/
zeta.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9//! Boldi–Vigna ζ codes.
10//!
11//! The ζ code with parameter *k* ≥ 1 of a natural number *n* is the
12//! concatenation of the unary code of *h* = ⌊⌊log₂(*n* + 1)⌋ / *k*⌋ and of
13//! the [minimal binary code](crate::codes::minimal_binary) of *n* + 1 − 2*ʰᵏ*
14//! with 2⁽*ʰ* ⁺ ¹⁾*ᵏ* − 2*ʰᵏ* as upper bound.
15//!
16//! The implied distribution of a ζ code with parameter *k* is ≈ 1/*x*<sup>1 +
17//! 1/*k*</sup>.
18//!
19//! Note that ζ₁ = [π₀](crate::codes::pi) = [γ](crate::codes::gamma) and ζ₂ =
20//! [π₁](crate::codes::pi). However, due to [subtle problems with
21//! endianness](crate::codes), in the little-endian case ζ₂ and π₁ have the same
22//! codeword lengths but slightly permuted bits.
23//!
24//! This module provides a generic implementation of ζ codes, and a specialized
25//! implementation for ζ₃ that may use tables.
26//!
27//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [1 . . 64).
28//!
29//! # References
30//!
31//! Paolo Boldi and Sebastiano Vigna. “[Codes for the World–Wide
32//! Web](https://doi.org/10.1080/15427951.2005.10129113)”. Internet Math.,
33//! 2(4):405–427, 2005.
34
35use super::{MinimalBinaryRead, MinimalBinaryWrite, len_minimal_binary, zeta_tables};
36use crate::traits::*;
37
38/// Returns the length of the ζ code with parameter `k` for `n`.
39#[must_use]
40#[inline(always)]
41#[allow(clippy::collapsible_if)]
42pub fn len_zeta_param<const USE_TABLE: bool>(mut n: u64, k: usize) -> usize {
43    debug_assert!(k >= 1);
44    if USE_TABLE {
45        if k == zeta_tables::K {
46            if let Some(idx) = zeta_tables::LEN.get(n as usize) {
47                return *idx as usize;
48            }
49        }
50    }
51    debug_assert!(n < u64::MAX);
52    n += 1;
53    let h = n.ilog2() as usize / k;
54    let l = 1 << (h * k);
55    h + 1 + len_minimal_binary(n - l, (l << k).wrapping_sub(l))
56}
57
58/// Returns the length of the ζ code with parameter `k` for `n` using
59/// a default value for `USE_TABLE`.
60#[must_use]
61#[inline(always)]
62pub fn len_zeta(n: u64, k: usize) -> usize {
63    len_zeta_param::<true>(n, k)
64}
65
66/// Trait for reading ζ codes.
67///
68/// This is the trait you should usually pull into scope to read ζ codes.
69pub trait ZetaRead<E: Endianness>: BitRead<E> {
70    fn read_zeta(&mut self, k: usize) -> Result<u64, Self::Error>;
71    fn read_zeta3(&mut self) -> Result<u64, Self::Error>;
72}
73
74/// Parametric trait for reading ζ codes.
75///
76/// This trait is more general than [`ZetaRead`], as it makes it possible
77/// to specify how to use tables using const parameters.
78///
79/// We provide an implementation of this trait for [`BitRead`]. An implementation
80/// of [`ZetaRead`] using default values is usually provided exploiting the
81/// [`crate::codes::params::ReadParams`] mechanism.
82pub trait ZetaReadParam<E: Endianness>: BitRead<E> {
83    fn read_zeta_param(&mut self, k: usize) -> Result<u64, Self::Error>;
84    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, Self::Error>;
85}
86
87impl<B: BitRead<BE>> ZetaReadParam<BE> for B {
88    #[inline(always)]
89    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
90        default_read_zeta(self, k)
91    }
92
93    #[inline(always)]
94    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
95        const {
96            if USE_TABLE {
97                zeta_tables::check_read_table(B::PEEK_BITS)
98            }
99        }
100        if USE_TABLE {
101            if let Some((res, _)) = zeta_tables::read_table_be(self) {
102                return Ok(res);
103            }
104        }
105        default_read_zeta(self, 3)
106    }
107}
108
109impl<B: BitRead<LE>> ZetaReadParam<LE> for B {
110    #[inline(always)]
111    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
112        default_read_zeta(self, k)
113    }
114
115    #[inline(always)]
116    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
117        const {
118            if USE_TABLE {
119                zeta_tables::check_read_table(B::PEEK_BITS)
120            }
121        }
122        if USE_TABLE {
123            if let Some((res, _)) = zeta_tables::read_table_le(self) {
124                return Ok(res);
125            }
126        }
127        default_read_zeta(self, 3)
128    }
129}
130
131/// Default, internal non-table based implementation that works
132/// for any endianness.
133#[inline(always)]
134fn default_read_zeta<BO: Endianness, B: BitRead<BO>>(
135    backend: &mut B,
136    k: usize,
137) -> Result<u64, B::Error> {
138    debug_assert!(k >= 1);
139    let h = backend.read_unary()? as usize;
140    debug_assert!(h * k < 64);
141    let l = 1_u64 << (h * k);
142    let res = backend.read_minimal_binary((l << k).wrapping_sub(l))?;
143    Ok(l + res - 1)
144}
145
146/// Trait for writing ζ codes.
147///
148/// This is the trait you should usually pull into scope to write ζ codes.
149pub trait ZetaWrite<E: Endianness>: BitWrite<E> {
150    fn write_zeta(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
151    fn write_zeta3(&mut self, n: u64) -> Result<usize, Self::Error>;
152}
153
154/// Parametric trait for writing ζ codes.
155///
156/// This trait is more general than [`ZetaWrite`], as it makes it possible
157/// to specify how to use tables using const parameters.
158///
159/// We provide an implementation of this trait for [`BitWrite`]. An implementation
160/// of [`ZetaWrite`] using default values is usually provided exploiting the
161/// [`crate::codes::params::WriteParams`] mechanism.
162pub trait ZetaWriteParam<E: Endianness>: BitWrite<E> {
163    fn write_zeta_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
164    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
165}
166
167impl<B: BitWrite<BE>> ZetaWriteParam<BE> for B {
168    #[inline(always)]
169    fn write_zeta_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
170        default_write_zeta(self, n, k)
171    }
172
173    #[inline(always)]
174    #[allow(clippy::collapsible_if)]
175    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
176        if USE_TABLE {
177            if let Some(len) = zeta_tables::write_table_be(self, n)? {
178                return Ok(len);
179            }
180        }
181        default_write_zeta(self, n, 3)
182    }
183}
184
185impl<B: BitWrite<LE>> ZetaWriteParam<LE> for B {
186    #[inline(always)]
187    fn write_zeta_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
188        default_write_zeta(self, n, k)
189    }
190
191    #[inline(always)]
192    #[allow(clippy::collapsible_if)]
193    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
194        if USE_TABLE {
195            if let Some(len) = zeta_tables::write_table_le(self, n)? {
196                return Ok(len);
197            }
198        }
199        default_write_zeta(self, n, 3)
200    }
201}
202
203/// Default, internal non-table based implementation that works
204/// for any endianness.
205#[inline(always)]
206fn default_write_zeta<E: Endianness, B: BitWrite<E>>(
207    backend: &mut B,
208    mut n: u64,
209    k: usize,
210) -> Result<usize, B::Error> {
211    debug_assert!(k >= 1);
212    debug_assert!(n < u64::MAX);
213    n += 1;
214    let h = n.ilog2() as usize / k;
215    let l = 1 << (h * k);
216
217    debug_assert!(l <= n, "{} <= {}", l, n);
218
219    // Write the code
220    Ok(backend.write_unary(h as u64)?
221        + backend.write_minimal_binary(n - l, (l << k).wrapping_sub(l))?)
222}