dsi_bitstream/utils/implied.rs
1/*
2 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5 */
6
7use crate::utils::FindChangePoints;
8
9use rand::distr::weighted::WeightedIndex;
10use rand::prelude::*;
11
12/// Given the len function of a code, generates data that allows to sample
13/// its implied distribution, i.e. a code-word with length l has
14/// probability 2^(-l).
15///
16/// This code works only with monotonic non decreasing len functions.
17///
18/// Returns two vectors, the first one contains the input values where the
19/// function changes value and the code length at that point. The second
20/// vector contains the probability of each code length.
21///
22/// Since we cannot write more than 64 bits at once, the codes are limited to
23/// 128 bits.
24pub fn get_implied_distribution(f: impl Fn(u64) -> usize) -> (Vec<(u64, usize)>, Vec<f64>) {
25 let change_points = FindChangePoints::new(f)
26 .take_while(|(_input, len)| *len <= 128)
27 .collect::<Vec<_>>();
28
29 // convert to len probabilities
30 let probabilities = change_points
31 .windows(2)
32 .map(|window| {
33 let (input, len) = window[0];
34 let (next_input, _next_len) = window[1];
35 let prob = 2.0_f64.powi(-(len as i32));
36 prob * (next_input - input) as f64
37 })
38 .collect::<Vec<_>>();
39 // TODO!: this ignores the last change point
40
41 (change_points, probabilities)
42}
43
44#[derive(Clone, Copy, Debug)]
45/// An infinite iterator that always returns ().
46pub struct InfiniteIterator;
47
48impl Iterator for InfiniteIterator {
49 type Item = ();
50
51 #[inline(always)]
52 fn next(&mut self) -> Option<Self::Item> {
53 Some(())
54 }
55}
56
57/// Returns an **infinite iterator** of samples from the implied distribution of
58/// the given code length function.
59/// The function f should be the len function of the code.
60///
61/// This code works only with monotonic non decreasing len functions and
62/// the codes are limited to 128 bits as we cannot write more than 64 bits at once.
63///
64/// # Example
65///
66/// ```rust
67/// use dsi_bitstream::utils::sample_implied_distribution;
68/// use dsi_bitstream::codes::len_gamma;
69/// use rand::SeedableRng;
70/// use rand::rngs::SmallRng;
71///
72/// let mut rng = SmallRng::seed_from_u64(42);
73/// let vals: Vec<u64> = sample_implied_distribution(len_gamma, &mut rng)
74/// .take(1000).collect::<Vec<_>>();
75///
76/// assert_eq!(vals.len(), 1000);
77/// ```
78pub fn sample_implied_distribution(
79 f: impl Fn(u64) -> usize,
80 rng: &mut impl Rng,
81) -> impl Iterator<Item = u64> + '_ {
82 let (change_points, probabilities) = get_implied_distribution(f);
83 let dist = WeightedIndex::new(probabilities).unwrap();
84
85 InfiniteIterator.map(move |_| {
86 // sample a len with the correct probability
87 let idx = dist.sample(rng);
88 // now we sample a random value with the sampled len
89 let (start_input, _len) = change_points[idx];
90 let (end_input, _len) = change_points[idx + 1];
91 rng.random_range(start_input..end_input)
92 })
93}