Skip to main content

dsi_bitstream/codes/
zeta.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9//! Boldi–Vigna ζ codes.
10//!
11//! The ζ code with parameter *k* ≥ 1 of a natural number *n* is the
12//! concatenation of the unary code of *h* = ⌊⌊log₂(*n* + 1)⌋ / *k*⌋ and of
13//! the [minimal binary code](crate::codes::minimal_binary) of *n* + 1 − 2*ʰᵏ*
14//! with 2⁽*ʰ* ⁺ ¹⁾*ᵏ* − 2*ʰᵏ* as upper bound.
15//!
16//! The implied distribution of a ζ code with parameter *k* is ≈ 1/*x*<sup>1 +
17//! 1/*k*</sup>.
18//!
19//! Note that ζ₁ = [π₀](crate::codes::pi) = [γ](crate::codes::gamma) and ζ₂ =
20//! [π₁](crate::codes::pi). However, due to [subtle problems with
21//! endianness](crate::codes), in the little-endian case ζ₂ and π₁ have the same
22//! codeword lengths but slightly permuted bits.
23//!
24//! This module provides a generic implementation of ζ codes, and a specialized
25//! implementation for ζ₃ that may use tables.
26//!
27//! The supported range is [0 . . 2⁶⁴ – 1) for *k* in [1 . . 64).
28//!
29//! # References
30//!
31//! Paolo Boldi and Sebastiano Vigna. “[Codes for the World–Wide
32//! Web](https://doi.org/10.1080/15427951.2005.10129113)”. Internet Math.,
33//! 2(4):405–427, 2005.
34
35use super::{MinimalBinaryRead, MinimalBinaryWrite, len_minimal_binary, zeta_tables};
36use crate::traits::*;
37
38/// Returns the length of the ζ code with parameter `k` for `n`.
39#[must_use]
40#[inline(always)]
41#[allow(clippy::collapsible_if)]
42pub fn len_zeta_param<const USE_TABLE: bool>(mut n: u64, k: usize) -> usize {
43    debug_assert!(k >= 1);
44    if USE_TABLE {
45        if k == zeta_tables::K {
46            // We cannot use .get() here because n is a u64
47            if n < zeta_tables::LEN.len() as u64 {
48                return zeta_tables::LEN[n as usize] as usize;
49            }
50        }
51    }
52    debug_assert!(n < u64::MAX);
53    n += 1;
54    let h = n.ilog2() as usize / k;
55    let l = 1 << (h * k);
56    h + 1 + len_minimal_binary(n - l, (l << k).wrapping_sub(l))
57}
58
59/// Returns the length of the ζ code with parameter `k` for `n` using
60/// a default value for `USE_TABLE`.
61#[must_use]
62#[inline(always)]
63pub fn len_zeta(n: u64, k: usize) -> usize {
64    len_zeta_param::<true>(n, k)
65}
66
67/// Trait for reading ζ codes.
68///
69/// This is the trait you should usually pull into scope to read ζ codes.
70pub trait ZetaRead<E: Endianness>: BitRead<E> {
71    fn read_zeta(&mut self, k: usize) -> Result<u64, Self::Error>;
72    fn read_zeta3(&mut self) -> Result<u64, Self::Error>;
73}
74
75/// Parametric trait for reading ζ codes.
76///
77/// This trait is more general than [`ZetaRead`], as it makes it possible
78/// to specify how to use tables using const parameters.
79///
80/// We provide an implementation of this trait for [`BitRead`]. An implementation
81/// of [`ZetaRead`] using default values is usually provided exploiting the
82/// [`crate::codes::params::ReadParams`] mechanism.
83pub trait ZetaReadParam<E: Endianness>: BitRead<E> {
84    fn read_zeta_param(&mut self, k: usize) -> Result<u64, Self::Error>;
85    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, Self::Error>;
86}
87
88impl<B: BitRead<BE>> ZetaReadParam<BE> for B {
89    #[inline(always)]
90    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
91        default_read_zeta(self, k)
92    }
93
94    #[inline(always)]
95    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
96        const {
97            if USE_TABLE {
98                zeta_tables::check_read_table(B::PEEK_BITS)
99            }
100        }
101        if USE_TABLE {
102            if let Some((res, _)) = zeta_tables::read_table_be(self) {
103                return Ok(res);
104            }
105        }
106        default_read_zeta(self, 3)
107    }
108}
109
110impl<B: BitRead<LE>> ZetaReadParam<LE> for B {
111    #[inline(always)]
112    fn read_zeta_param(&mut self, k: usize) -> Result<u64, B::Error> {
113        default_read_zeta(self, k)
114    }
115
116    #[inline(always)]
117    fn read_zeta3_param<const USE_TABLE: bool>(&mut self) -> Result<u64, B::Error> {
118        const {
119            if USE_TABLE {
120                zeta_tables::check_read_table(B::PEEK_BITS)
121            }
122        }
123        if USE_TABLE {
124            if let Some((res, _)) = zeta_tables::read_table_le(self) {
125                return Ok(res);
126            }
127        }
128        default_read_zeta(self, 3)
129    }
130}
131
132/// Default, internal non-table based implementation that works
133/// for any endianness.
134#[inline(always)]
135fn default_read_zeta<BO: Endianness, B: BitRead<BO>>(
136    backend: &mut B,
137    k: usize,
138) -> Result<u64, B::Error> {
139    debug_assert!(k >= 1);
140    let h = backend.read_unary()? as usize;
141    debug_assert!(h * k < 64);
142    let l = 1_u64 << (h * k);
143    let res = backend.read_minimal_binary((l << k).wrapping_sub(l))?;
144    Ok(l + res - 1)
145}
146
147/// Trait for writing ζ codes.
148///
149/// This is the trait you should usually pull into scope to write ζ codes.
150pub trait ZetaWrite<E: Endianness>: BitWrite<E> {
151    fn write_zeta(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
152    fn write_zeta3(&mut self, n: u64) -> Result<usize, Self::Error>;
153}
154
155/// Parametric trait for writing ζ codes.
156///
157/// This trait is more general than [`ZetaWrite`], as it makes it possible
158/// to specify how to use tables using const parameters.
159///
160/// We provide an implementation of this trait for [`BitWrite`]. An implementation
161/// of [`ZetaWrite`] using default values is usually provided exploiting the
162/// [`crate::codes::params::WriteParams`] mechanism.
163pub trait ZetaWriteParam<E: Endianness>: BitWrite<E> {
164    fn write_zeta_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error>;
165    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error>;
166}
167
168impl<B: BitWrite<BE>> ZetaWriteParam<BE> for B {
169    #[inline(always)]
170    fn write_zeta_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
171        default_write_zeta(self, n, k)
172    }
173
174    #[inline(always)]
175    #[allow(clippy::collapsible_if)]
176    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
177        if USE_TABLE {
178            if let Some(len) = zeta_tables::write_table_be(self, n)? {
179                return Ok(len);
180            }
181        }
182        default_write_zeta(self, n, 3)
183    }
184}
185
186impl<B: BitWrite<LE>> ZetaWriteParam<LE> for B {
187    #[inline(always)]
188    fn write_zeta_param(&mut self, n: u64, k: usize) -> Result<usize, Self::Error> {
189        default_write_zeta(self, n, k)
190    }
191
192    #[inline(always)]
193    #[allow(clippy::collapsible_if)]
194    fn write_zeta3_param<const USE_TABLE: bool>(&mut self, n: u64) -> Result<usize, Self::Error> {
195        if USE_TABLE {
196            if let Some(len) = zeta_tables::write_table_le(self, n)? {
197                return Ok(len);
198            }
199        }
200        default_write_zeta(self, n, 3)
201    }
202}
203
204/// Default, internal non-table based implementation that works
205/// for any endianness.
206#[inline(always)]
207fn default_write_zeta<E: Endianness, B: BitWrite<E>>(
208    backend: &mut B,
209    mut n: u64,
210    k: usize,
211) -> Result<usize, B::Error> {
212    debug_assert!(k >= 1);
213    debug_assert!(n < u64::MAX);
214    n += 1;
215    let h = n.ilog2() as usize / k;
216    let l = 1 << (h * k);
217
218    debug_assert!(l <= n, "{} <= {}", l, n);
219
220    // Write the code
221    Ok(backend.write_unary(h as u64)?
222        + backend.write_minimal_binary(n - l, (l << k).wrapping_sub(l))?)
223}