dsi_bitstream/utils/implied.rs
1/*
2 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5 */
6
7#[cfg(feature = "alloc")]
8use crate::utils::FindChangePoints;
9#[cfg(feature = "alloc")]
10use rand::distr::weighted::WeightedIndex;
11#[cfg(feature = "alloc")]
12use rand::prelude::*;
13
14#[cfg(feature = "alloc")]
15use alloc::vec::Vec;
16
17/// Given the len function of a code, generates data that allows to sample
18/// its implied distribution, i.e. a code-word with length l has
19/// probability 2^(-l).
20///
21/// This code works only with monotonic non decreasing len functions.
22///
23/// Returns two vectors, the first one contains the input values where the
24/// function changes value and the code length at that point. The second
25/// vector contains the probability of each code length.
26///
27/// Since we cannot write more than 64 bits at once, the codes are limited to
28/// 128 bits.
29#[cfg(feature = "alloc")]
30pub fn get_implied_distribution(f: impl Fn(u64) -> usize) -> (Vec<(u64, usize)>, Vec<f64>) {
31 let change_points = FindChangePoints::new(f)
32 .take_while(|(_input, len)| *len <= 128)
33 .collect::<Vec<_>>();
34
35 // convert to len probabilities
36 let probabilities = change_points
37 .windows(2)
38 .map(|window| {
39 let (input, len) = window[0];
40 let (next_input, _next_len) = window[1];
41 let prob = 2.0_f64.powi(-(len as i32));
42 prob * (next_input - input) as f64
43 })
44 .collect::<Vec<_>>();
45 // TODO!: this ignores the last change point
46
47 (change_points, probabilities)
48}
49
50#[derive(Clone, Copy, Debug)]
51/// An infinite iterator that always returns ().
52pub struct InfiniteIterator;
53
54impl Iterator for InfiniteIterator {
55 type Item = ();
56
57 #[inline(always)]
58 fn next(&mut self) -> Option<Self::Item> {
59 Some(())
60 }
61}
62
63/// Returns an **infinite iterator** of samples from the implied distribution of
64/// the given code length function.
65/// The function f should be the len function of the code.
66///
67/// This code works only with monotonic non decreasing len functions and
68/// the codes are limited to 128 bits as we cannot write more than 64 bits at once.
69///
70/// # Example
71///
72/// ```rust
73/// use dsi_bitstream::utils::sample_implied_distribution;
74/// use dsi_bitstream::codes::len_gamma;
75/// use rand::SeedableRng;
76/// use rand::rngs::SmallRng;
77///
78/// let mut rng = SmallRng::seed_from_u64(42);
79/// let vals: Vec<u64> = sample_implied_distribution(len_gamma, &mut rng)
80/// .take(1000).collect::<Vec<_>>();
81///
82/// assert_eq!(vals.len(), 1000);
83/// ```
84#[cfg(feature = "alloc")]
85pub fn sample_implied_distribution(
86 f: impl Fn(u64) -> usize,
87 rng: &mut impl Rng,
88) -> impl Iterator<Item = u64> + '_ {
89 let (change_points, probabilities) = get_implied_distribution(f);
90 let dist = WeightedIndex::new(probabilities).unwrap();
91
92 InfiniteIterator.map(move |_| {
93 // sample a len with the correct probability
94 let idx = dist.sample(rng);
95 // now we sample a random value with the sampled len
96 let (start_input, _len) = change_points[idx];
97 let (end_input, _len) = change_points[idx + 1];
98 rng.random_range(start_input..end_input)
99 })
100}