constriction/stream/model/categorical/
non_contiguous.rs

1use core::{borrow::Borrow, hash::Hash, marker::PhantomData};
2
3#[cfg(feature = "std")]
4use std::collections::{
5    hash_map::Entry::{Occupied, Vacant},
6    HashMap,
7};
8
9#[cfg(not(feature = "std"))]
10use hashbrown::hash_map::{
11    Entry::{Occupied, Vacant},
12    HashMap,
13};
14
15use alloc::{boxed::Box, vec::Vec};
16use num_traits::{float::FloatCore, AsPrimitive};
17
18use crate::{wrapping_pow2, BitArray, NonZeroBitArray};
19
20use super::{
21    super::{DecoderModel, EncoderModel, EntropyModel, IterableEntropyModel},
22    accumulate_nonzero_probabilities, fast_quantized_cdf, iter_extended_cdf,
23    lookup_noncontiguous::NonContiguousLookupDecoderModel,
24    perfectly_quantized_probabilities,
25};
26
27/// Type alias for a typical [`NonContiguousCategoricalEncoderModel`].
28///
29/// See:
30/// - [`NonContiguousCategoricalEncoderModel`]
31/// - [discussion of presets](crate::stream#presets)
32pub type DefaultNonContiguousCategoricalEncoderModel<Symbol> =
33    NonContiguousCategoricalEncoderModel<Symbol, u32, 24>;
34
35/// Type alias for a [`NonContiguousCategoricalEncoderModel`] optimized for compatibility
36/// with lookup decoder models.
37///
38/// See:
39/// - [`NonContiguousCategoricalEncoderModel`]
40/// - [discussion of presets](crate::stream#presets)
41pub type SmallNonContiguousCategoricalEncoderModel<Symbol> =
42    NonContiguousCategoricalEncoderModel<Symbol, u16, 12>;
43
44/// Type alias for a typical [`NonContiguousCategoricalDecoderModel`].
45///
46/// See:
47/// - [`NonContiguousCategoricalDecoderModel`]
48/// - [discussion of presets](crate::stream#presets)
49pub type DefaultNonContiguousCategoricalDecoderModel<Symbol, Cdf = Vec<(u32, Symbol)>> =
50    NonContiguousCategoricalDecoderModel<Symbol, u32, Cdf, 24>;
51
52/// Type alias for a [`NonContiguousCategoricalDecoderModel`] optimized for compatibility
53/// with lookup decoder models.
54///
55/// See:
56/// - [`NonContiguousCategoricalDecoderModel`]
57/// - [discussion of presets](crate::stream#presets)
58pub type SmallNonContiguousCategoricalDecoderModel<Symbol, Cdf = Vec<(u16, Symbol)>> =
59    NonContiguousCategoricalDecoderModel<Symbol, u16, Cdf, 12>;
60
61/// An entropy model for a categorical probability distribution over arbitrary symbols, for
62/// decoding only.
63///
64/// You will usually want to use this type through one of its type aliases,
65/// [`DefaultNonContiguousCategoricalDecoderModel`] or
66/// [`SmallNonContiguousCategoricalDecoderModel`], see [discussion of
67/// presets](crate::stream#presets).
68///
69/// This type implements the trait [`DecoderModel`] but not the trait [`EncoderModel`].
70/// Thus, you can use a `NonContiguousCategoricalDecoderModel` for *decoding* with any of
71/// the stream decoders provided by the `constriction` crate, but not for encoding. If you
72/// want to encode data, use a [`NonContiguousCategoricalEncoderModel`] instead. You can
73/// convert a `NonContiguousCategoricalDecoderModel` to a
74/// `NonContiguousCategoricalEncoderModel` by calling
75/// [`to_generic_encoder_model`](IterableEntropyModel::to_generic_encoder_model) on it
76/// (you'll have to bring the trait [`IterableEntropyModel`] into scope to do so: `use
77/// constriction::stream::model::IterableEntropyModel`).
78///
79/// # Example
80///
81/// See [example for
82/// `NonContiguousCategoricalEncoderModel`](NonContiguousCategoricalEncoderModel#example).
83///
84/// # When Should I Use This Type of Entropy Model?
85///
86/// Use a `NonContiguousCategoricalDecoderModel` for probabilistic models that can *only* be
87/// represented as an explicit probability table, and not by some more compact analytic
88/// expression.
89///
90/// - If you have a probability model that can be expressed by some analytical expression
91///   (e.g., a [`Binomial`](probability::distribution::Binomial) distribution), then use
92///   [`LeakyQuantizer`] instead (unless you want to encode lots of symbols with the same
93///   entropy model, in which case the explicitly tabulated representation of a categorical
94///   entropy model could improve runtime performance).
95/// - If the *support* of your probabilistic model (i.e., the set of symbols to which the
96///   model assigns a non-zero probability) is a contiguous range of integers starting at
97///   zero, then it is better to use a [`ContiguousCategoricalEntropyModel`]. It has better
98///   computational efficiency and it is easier to use since it supports both encoding and
99///   decoding with a single type.
100/// - If you want to decode only a few symbols with a given probability model, then use a
101///   [`LazyContiguousCategoricalEntropyModel`], which will be faster (use an array to map
102///   the decoded symbols from the contiguous range `0..N` to whatever noncontiguous
103///   alphabet you have). This use case occurs, e.g., in autoregressive models, where each
104///   individual model is often used for only exactly one symbol.
105/// - If you want to decode lots of symbols with the same entropy model, and if reducing the
106///   `PRECISION` to a moderate value is acceptable to you, then you may want to consider
107///   using a [`NonContiguousLookupDecoderModel`] instead for even better runtime
108///   performance (at the cost of a larger memory footprint and worse compression efficiency
109///   due to lower `PRECISION`).
110///
111/// # Computational Efficiency
112///
113/// For a probability distribution with a support of `N` symbols, a
114/// `NonContiguousCategoricalDecoderModel` has the following asymptotic costs:
115///
116/// - creation:
117///   - runtime cost: `Θ(N log(N))` (when creating with the [`..._fast` constructor]);
118///   - memory footprint: `Θ(N)`;
119/// - encoding a symbol: not supported; use a [`NonContiguousCategoricalEncoderModel`]
120///   instead.
121/// - decoding a symbol (calling [`DecoderModel::quantile_function`]):
122///   - runtime cost: `Θ(log(N))` (both expected and worst-case)
123///   - memory footprint: no heap allocations, constant stack space.
124///
125/// [`EntropyModel`]: trait.EntropyModel.html
126/// [`Encode`]: crate::Encode
127/// [`Decode`]: crate::Decode
128/// [`HashMap`]: std::hash::HashMap
129/// [`ContiguousCategoricalEntropyModel`]:
130///     crate::stream::model::ContiguousCategoricalEntropyModel
131/// [`NonContiguousLookupDecoderModel`]:
132///     crate::stream::model::NonContiguousLookupDecoderModel
133/// [`LeakyQuantizer`]: crate::stream::model::LeakyQuantizer
134/// [`..._fast` constructor]: Self::from_symbols_and_floating_point_probabilities_fast
135/// [`LazyContiguousCategoricalEntropyModel`]:
136///     crate::stream::model::LazyContiguousCategoricalEntropyModel
137#[derive(Debug, Clone, Copy)]
138pub struct NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, const PRECISION: usize> {
139    /// Invariants:
140    /// - `cdf.len() >= 2` (actually, we currently even guarantee `cdf.len() >= 3` but
141    ///   this may be relaxed in the future)
142    /// - `cdf[0] == 0`
143    /// - `cdf` is monotonically increasing except that it may wrap around only at
144    ///   the very last entry (this happens iff `PRECISION == Probability::BITS`).
145    ///   Thus, all probabilities within range are guaranteed to be nonzero.
146    pub(super) cdf: Cdf,
147
148    pub(super) phantom: PhantomData<(Symbol, Probability)>,
149}
150
151impl<Symbol, Probability: BitArray, const PRECISION: usize>
152    NonContiguousCategoricalDecoderModel<Symbol, Probability, Vec<(Probability, Symbol)>, PRECISION>
153where
154    Symbol: Clone,
155{
156    /// Constructs a leaky distribution (for decoding) over the provided `symbols` whose PMF
157    /// approximates given `probabilities`.
158    ///
159    /// Semantics are analogous to
160    /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`],
161    /// except that this constructor has an additional `symbols` argument to provide an
162    /// iterator over the symbols in the alphabet (which has to yield exactly
163    /// `probabilities.len()` symbols).
164    ///
165    /// # See also
166    ///
167    /// - [`from_symbols_and_floating_point_probabilities_perfect`], which can be
168    ///   considerably slower but typically approximates the provided `probabilities` *very
169    ///   slightly* better.
170    ///
171    /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`]:
172    ///     crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast
173    /// [`from_symbols_and_floating_point_probabilities_perfect`]:
174    ///     Self::from_symbols_and_floating_point_probabilities_perfect
175    #[allow(clippy::result_unit_err)]
176    pub fn from_symbols_and_floating_point_probabilities_fast<F>(
177        symbols: impl IntoIterator<Item = Symbol>,
178        probabilities: &[F],
179        normalization: Option<F>,
180    ) -> Result<Self, ()>
181    where
182        F: FloatCore + core::iter::Sum<F> + AsPrimitive<Probability>,
183        Probability: AsPrimitive<usize>,
184        usize: AsPrimitive<Probability> + AsPrimitive<F>,
185    {
186        let cdf = fast_quantized_cdf::<Probability, F, PRECISION>(probabilities, normalization)?;
187
188        let mut extended_cdf = Vec::with_capacity(probabilities.len() + 1);
189        extended_cdf.extend(cdf.zip(symbols));
190        let last_symbol = extended_cdf.last().expect("`len` >= 2").1.clone();
191        extended_cdf.push((wrapping_pow2(PRECISION), last_symbol));
192
193        Ok(Self::from_extended_cdf(extended_cdf))
194    }
195
196    /// Slower variant of [`from_symbols_and_floating_point_probabilities_fast`].
197    ///
198    /// Similar to [`from_symbols_and_floating_point_probabilities_fast`], but the resulting
199    /// (fixed-point precision) model typically approximates the provided floating point
200    /// `probabilities` *very slightly* better. Only recommended if compression performance
201    /// is *much* more important to you than runtime as this constructor can be
202    /// significantly slower.
203    ///
204    /// See [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]
205    /// for a detailed comparison between `..._fast` and `..._perfect` constructors of
206    /// categorical entropy models.
207    ///
208    /// [`from_symbols_and_floating_point_probabilities_fast`]:
209    ///     Self::from_symbols_and_floating_point_probabilities_fast
210    /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]:
211    ///     crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect
212    #[allow(clippy::result_unit_err)]
213    pub fn from_symbols_and_floating_point_probabilities_perfect<F>(
214        symbols: impl IntoIterator<Item = Symbol>,
215        probabilities: &[F],
216    ) -> Result<Self, ()>
217    where
218        F: FloatCore + core::iter::Sum<F> + Into<f64>,
219        Probability: Into<f64> + AsPrimitive<usize>,
220        f64: AsPrimitive<Probability>,
221        usize: AsPrimitive<Probability>,
222    {
223        let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?;
224        Self::from_symbols_and_nonzero_fixed_point_probabilities(
225            symbols,
226            slots.into_iter().map(|slot| slot.weight),
227            false,
228        )
229    }
230
231    /// Deprecated constructor.
232    ///
233    /// This constructor has been deprecated in constriction version 0.4.0, and it will be
234    /// removed in constriction version 0.5.0.
235    ///
236    /// # Upgrade Instructions
237    ///
238    /// Most *new* use cases should call
239    /// [`from_symbols_and_floating_point_probabilities_fast`] instead. Using that
240    /// constructor (abbreviated as `..._fast` in the following) may lead to very slightly
241    /// larger bit rates, but it runs considerably faster.
242    ///
243    /// However, note that the `..._fast` constructor breaks binary compatibility with
244    /// `constriction` version <= 0.3.5. If you need to be able to exchange binary
245    /// compressed data with a program that uses a categorical entropy model from
246    /// `constriction` version <= 0.3.5, then call
247    /// [`from_symbols_and_floating_point_probabilities_perfect`] instead (`..._perfect` for
248    /// short). Another reason for using the `..._perfect` constructor could be if
249    /// compression performance is *much* more important to you than runtime performance.
250    /// See documentation of [`from_symbols_and_floating_point_probabilities_perfect`] for
251    /// more information.
252    ///
253    /// # Compatibility Table
254    ///
255    /// (In the following table, "encoding" refers to
256    /// [`NonContiguousCategoricalEncoderModel`])
257    ///
258    /// | constructor used for encoding → <br> ↓ constructor used for decoding ↓ | [legacy](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities) |  [`..._perfect`](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_perfect) | [`..._fast`](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_fast) |
259    /// | --------------------: | --------------- | --------------- | --------------- |
260    /// | **legacy (this one)** | ✅ compatible   | ✅ compatible   | ❌ incompatible |
261    /// | **[`..._perfect`]**   | ✅ compatible   | ✅ compatible   | ❌ incompatible |
262    /// | **[`..._fast`]**      | ❌ incompatible | ❌ incompatible | ✅ compatible   |
263    ///
264    /// [`from_symbols_and_floating_point_probabilities_perfect`]:
265    ///     Self::from_symbols_and_floating_point_probabilities_perfect
266    /// [`..._perfect`]: Self::from_symbols_and_floating_point_probabilities_perfect
267    /// [`from_symbols_and_floating_point_probabilities_fast`]:
268    ///     Self::from_symbols_and_floating_point_probabilities_fast
269    /// [`..._fast`]: Self::from_symbols_and_floating_point_probabilities_fast
270    #[deprecated(
271        since = "0.4.0",
272        note = "Please use `from_symbols_and_floating_point_probabilities_fast` or \
273        `from_symbols_and_floating_point_probabilities_perfect` instead. See documentation for \
274        detailed upgrade instructions."
275    )]
276    #[allow(clippy::result_unit_err)]
277    pub fn from_symbols_and_floating_point_probabilities<F>(
278        symbols: &[Symbol],
279        probabilities: &[F],
280    ) -> Result<Self, ()>
281    where
282        F: FloatCore + core::iter::Sum<F> + Into<f64>,
283        Probability: Into<f64> + AsPrimitive<usize>,
284        f64: AsPrimitive<Probability>,
285        usize: AsPrimitive<Probability>,
286    {
287        Self::from_symbols_and_floating_point_probabilities_perfect(
288            symbols.iter().cloned(),
289            probabilities,
290        )
291    }
292
293    /// Constructs a distribution with a PMF given in fixed point arithmetic.
294    ///
295    /// This is a low level method that allows, e.g,. reconstructing a probability
296    /// distribution previously exported with [`symbol_table`]. The more common way to
297    /// construct a `NonContiguousCategoricalDecoderModel` is via
298    /// [`from_symbols_and_floating_point_probabilities_fast`].
299    ///
300    /// The items of `probabilities` have to be nonzero and smaller than `1 << PRECISION`,
301    /// where `PRECISION` is a const generic parameter on the
302    /// `NonContiguousCategoricalDecoderModel`.
303    ///
304    /// If `infer_last_probability` is `false` then `probabilities` must yield the same
305    /// number of items as `symbols` does, and the items yielded by `probabilities` have to
306    /// to (logically) sum up to `1 << PRECISION`. If `infer_last_probability` is `true`
307    /// then `probabilities` must yield one fewer item than `symbols`, they must sum up to a
308    /// value strictly smaller than `1 << PRECISION`, and the method will assign the
309    /// (nonzero) remaining probability to the last symbol.
310    ///
311    /// # Example
312    ///
313    /// Creating a `NonContiguousCategoricalDecoderModel` with inferred probability of the
314    /// last symbol:
315    ///
316    /// ```
317    /// use constriction::stream::model::{
318    ///     DefaultNonContiguousCategoricalDecoderModel, IterableEntropyModel
319    /// };
320    ///
321    /// let partial_probabilities = vec![1u32 << 21, 1 << 22, 1 << 22, 1 << 22];
322    /// // `partial_probabilities` sums up to strictly less than `1 << PRECISION` as required:
323    /// assert!(partial_probabilities.iter().sum::<u32>() < 1 << 24);
324    ///
325    /// let symbols = "abcde"; // Has one more entry than `probabilities`
326    ///
327    /// let model = DefaultNonContiguousCategoricalDecoderModel
328    ///     ::from_symbols_and_nonzero_fixed_point_probabilities(
329    ///         symbols.chars(), &partial_probabilities, true).unwrap();
330    /// let symbol_table = model.floating_point_symbol_table::<f64>().collect::<Vec<_>>();
331    /// assert_eq!(
332    ///     symbol_table,
333    ///     vec![
334    ///         ('a', 0.0, 0.125),
335    ///         ('b', 0.125, 0.25),
336    ///         ('c', 0.375, 0.25),
337    ///         ('d', 0.625, 0.25),
338    ///         ('e', 0.875, 0.125), // Inferred last probability.
339    ///     ]
340    /// );
341    /// ```
342    ///
343    /// For more related examples, see
344    /// [`ContiguousCategoricalEntropyModel::from_nonzero_fixed_point_probabilities`].
345    ///
346    /// [`symbol_table`]: IterableEntropyModel::symbol_table
347    /// [`fixed_point_probabilities`]: Self::fixed_point_probabilities
348    /// [`from_symbols_and_floating_point_probabilities_fast`]:
349    ///     Self::from_symbols_and_floating_point_probabilities_fast
350    /// [`ContiguousCategoricalEntropyModel::from_nonzero_fixed_point_probabilities`]:
351    ///     crate::stream::model::ContiguousCategoricalEntropyModel::from_nonzero_fixed_point_probabilities`
352    #[allow(clippy::result_unit_err)]
353    pub fn from_symbols_and_nonzero_fixed_point_probabilities<S, P>(
354        symbols: S,
355        probabilities: P,
356        infer_last_probability: bool,
357    ) -> Result<Self, ()>
358    where
359        S: IntoIterator<Item = Symbol>,
360        P: IntoIterator,
361        P::Item: Borrow<Probability>,
362    {
363        let symbols = symbols.into_iter();
364        let mut cdf = Vec::with_capacity(symbols.size_hint().0 + 1);
365        let mut symbols = accumulate_nonzero_probabilities::<_, _, _, _, _, PRECISION>(
366            symbols,
367            probabilities.into_iter(),
368            |symbol, left_sided_cumulative, _| {
369                cdf.push((left_sided_cumulative, symbol));
370                Ok(())
371            },
372            infer_last_probability,
373        )?;
374        cdf.push((
375            wrapping_pow2(PRECISION),
376            cdf.last().expect("`symbols` is not empty").1.clone(),
377        ));
378
379        if symbols.next().is_some() {
380            Err(())
381        } else {
382            Ok(Self::from_extended_cdf(cdf))
383        }
384    }
385
386    #[inline(always)]
387    fn from_extended_cdf(cdf: Vec<(Probability, Symbol)>) -> Self {
388        Self {
389            cdf,
390            phantom: PhantomData,
391        }
392    }
393
394    /// Creates a `NonContiguousCategoricalDecoderModel` from any entropy model that
395    /// implements [`IterableEntropyModel`].
396    ///
397    /// Calling `NonContiguousCategoricalDecoderModel::from_iterable_entropy_model(&model)`
398    /// is equivalent to calling `model.to_generic_decoder_model()`, where the latter
399    /// requires bringing [`IterableEntropyModel`] into scope.
400    pub fn from_iterable_entropy_model<'m, M>(model: &'m M) -> Self
401    where
402        M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
403    {
404        let symbol_table = model.symbol_table();
405        let mut cdf = Vec::with_capacity(symbol_table.size_hint().0 + 1);
406        cdf.extend(
407            symbol_table.map(|(symbol, left_sided_cumulative, _)| (left_sided_cumulative, symbol)),
408        );
409        cdf.push((
410            wrapping_pow2(PRECISION),
411            cdf.last().expect("`symbol_table` is not empty").1.clone(),
412        ));
413
414        Self {
415            cdf,
416            phantom: PhantomData,
417        }
418    }
419}
420
421impl<Symbol, Probability, Cdf, const PRECISION: usize>
422    NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
423where
424    Symbol: Clone,
425    Probability: BitArray,
426    Cdf: AsRef<[(Probability, Symbol)]>,
427{
428    /// Returns the number of symbols supported by the model, i.e., the number of symbols to
429    /// which the model assigns a nonzero probability.
430    #[inline(always)]
431    pub fn support_size(&self) -> usize {
432        self.cdf.as_ref().len() - 1
433    }
434
435    /// Makes a very cheap shallow copy of the model that can be used much like a shared
436    /// reference.
437    ///
438    /// The returned `NonContiguousCategoricalDecoderModel` implements `Copy`, which is a
439    /// requirement for some methods, such as [`Decode::decode_iid_symbols`]. These methods
440    /// could also accept a shared reference to a `NonContiguousCategoricalDecoderModel`
441    /// (since all references to entropy models are also entropy models, and all shared
442    /// references implement `Copy`), but passing a *view* instead may be slightly more
443    /// efficient because it avoids one level of dereferencing.
444    ///
445    /// [`Decode::decode_iid_symbols`]: crate::stream::Decode::decode_iid_symbols
446    #[inline]
447    pub fn as_view(
448        &self,
449    ) -> NonContiguousCategoricalDecoderModel<
450        Symbol,
451        Probability,
452        &[(Probability, Symbol)],
453        PRECISION,
454    > {
455        NonContiguousCategoricalDecoderModel {
456            cdf: self.cdf.as_ref(),
457            phantom: PhantomData,
458        }
459    }
460
461    /// Creates a [`ContiguousLookupDecoderModel`] or [`NonContiguousLookupDecoderModel`] for efficient decoding of i.i.d. data
462    ///
463    /// While a `NonContiguousCategoricalEntropyModel` can already be used for decoding (since
464    /// it implements [`DecoderModel`]), you may prefer converting it to a
465    /// `LookupDecoderModel` first for improved efficiency. Logically, the two will be
466    /// equivalent.
467    ///
468    /// # Warning
469    ///
470    /// You should only call this method if both of the following conditions are satisfied:
471    ///
472    /// - `PRECISION` is relatively small (typically `PRECISION == 12`, as in the "Small"
473    ///   [preset]) because the memory footprint of a `LookupDecoderModel` grows
474    ///   exponentially in `PRECISION`; and
475    /// - you're about to decode a relatively large number of symbols with the resulting
476    ///   model; the conversion to a `LookupDecoderModel` bears a significant runtime and
477    ///   memory overhead, so if you're going to use the resulting model only for a single
478    ///   or a handful of symbols then you'll end up paying more than you gain.
479    ///
480    /// [preset]: crate::stream#presets
481    /// [`ContiguousLookupDecoderModel`]: crate::stream::model::ContiguousLookupDecoderModel
482    #[inline(always)]
483    pub fn to_lookup_decoder_model(
484        &self,
485    ) -> NonContiguousLookupDecoderModel<
486        Symbol,
487        Probability,
488        Vec<(Probability, Symbol)>,
489        Box<[Probability]>,
490        PRECISION,
491    >
492    where
493        Probability: Into<usize>,
494        usize: AsPrimitive<Probability>,
495    {
496        self.into()
497    }
498}
499
500impl<Symbol, Probability, Cdf, const PRECISION: usize> EntropyModel<PRECISION>
501    for NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
502where
503    Probability: BitArray,
504{
505    type Symbol = Symbol;
506    type Probability = Probability;
507}
508
509impl<'m, Symbol, Probability, Cdf, const PRECISION: usize> IterableEntropyModel<'m, PRECISION>
510    for NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
511where
512    Symbol: Clone + 'm,
513    Probability: BitArray,
514    Cdf: AsRef<[(Probability, Symbol)]>,
515{
516    #[inline(always)]
517    fn symbol_table(
518        &'m self,
519    ) -> impl Iterator<
520        Item = (
521            Self::Symbol,
522            Self::Probability,
523            <Self::Probability as BitArray>::NonZero,
524        ),
525    > {
526        iter_extended_cdf(self.cdf.as_ref().iter().cloned())
527    }
528
529    fn floating_point_symbol_table<F>(&'m self) -> impl Iterator<Item = (Self::Symbol, F, F)>
530    where
531        F: FloatCore + From<Self::Probability> + 'm,
532        Self::Probability: Into<F>,
533    {
534        // This gets compiled into a constant, and the divisions by `whole` get compiled
535        // into floating point multiplications rather than (slower) divisions.
536        let whole = (F::one() + F::one()) * (Self::Probability::one() << (PRECISION - 1)).into();
537
538        self.symbol_table()
539            .map(move |(symbol, cumulative, probability)| {
540                (
541                    symbol,
542                    cumulative.into() / whole,
543                    probability.get().into() / whole,
544                )
545            })
546    }
547
548    fn entropy_base2<F>(&'m self) -> F
549    where
550        F: num_traits::Float + core::iter::Sum,
551        Self::Probability: Into<F>,
552    {
553        let entropy_scaled = self
554            .symbol_table()
555            .map(|(_, _, probability)| {
556                let probability = probability.get().into();
557                probability * probability.log2() // probability is guaranteed to be nonzero.
558            })
559            .sum::<F>();
560
561        let whole = (F::one() + F::one()) * (Self::Probability::one() << (PRECISION - 1)).into();
562        F::from(PRECISION).unwrap() - entropy_scaled / whole
563    }
564
565    fn to_generic_encoder_model(
566        &'m self,
567    ) -> NonContiguousCategoricalEncoderModel<Self::Symbol, Self::Probability, PRECISION>
568    where
569        Self::Symbol: core::hash::Hash + Eq,
570    {
571        self.into()
572    }
573
574    fn to_generic_decoder_model(
575        &'m self,
576    ) -> NonContiguousCategoricalDecoderModel<
577        Self::Symbol,
578        Self::Probability,
579        Vec<(Self::Probability, Self::Symbol)>,
580        PRECISION,
581    >
582    where
583        Self::Symbol: Clone,
584    {
585        self.into()
586    }
587
588    fn to_generic_lookup_decoder_model(
589        &'m self,
590    ) -> NonContiguousLookupDecoderModel<
591        Self::Symbol,
592        Self::Probability,
593        Vec<(Self::Probability, Self::Symbol)>,
594        Box<[Self::Probability]>,
595        PRECISION,
596    >
597    where
598        Self::Probability: Into<usize>,
599        usize: AsPrimitive<Self::Probability>,
600        Self::Symbol: Clone + Default,
601    {
602        self.into()
603    }
604}
605
606impl<Symbol, Probability, Cdf, const PRECISION: usize> DecoderModel<PRECISION>
607    for NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
608where
609    Symbol: Clone,
610    Probability: BitArray,
611    Cdf: AsRef<[(Probability, Symbol)]>,
612{
613    #[inline(always)]
614    fn quantile_function(
615        &self,
616        quantile: Self::Probability,
617    ) -> (Symbol, Probability, Probability::NonZero) {
618        let cdf = self.cdf.as_ref();
619        // SAFETY: `cdf` is not empty.
620        let monotonic_part_of_cdf = unsafe { cdf.get_unchecked(..cdf.len() - 1) };
621        let Err(next_index) = monotonic_part_of_cdf.binary_search_by(|(cumulative, _symbol)| {
622            if *cumulative <= quantile {
623                core::cmp::Ordering::Less
624            } else {
625                core::cmp::Ordering::Greater
626            }
627        }) else {
628            // SAFETY: our search criterion never returns `Equal`, so the search cannot succeed.
629            unsafe { core::hint::unreachable_unchecked() }
630        };
631
632        // SAFETY:
633        // - `next_index < cdf.len()` because we searched only within `monotonic_part_of_cdf`, which
634        //   is one element shorter than `cdf`. Thus `cdf.get_unchecked(next_index)` is sound.
635        // - `next_index > 0` because `cdf[0] == 0` and our search goes right on equality; thus,
636        //   `next_index - 1` does not wrap around, and so `next_index - 1` is also within bounds.
637        let (right_cumulative, (left_cumulative, symbol)) = unsafe {
638            (
639                cdf.get_unchecked(next_index).0,
640                cdf.get_unchecked(next_index - 1).clone(),
641            )
642        };
643
644        // SAFETY: our constructors don't allow zero probabilities.
645        let probability = unsafe {
646            right_cumulative
647                .wrapping_sub(&left_cumulative)
648                .into_nonzero_unchecked()
649        };
650
651        (symbol, left_cumulative, probability)
652    }
653}
654
655impl<'m, Symbol, Probability, M, const PRECISION: usize> From<&'m M>
656    for NonContiguousCategoricalDecoderModel<
657        Symbol,
658        Probability,
659        Vec<(Probability, Symbol)>,
660        PRECISION,
661    >
662where
663    Symbol: Clone,
664    Probability: BitArray,
665    M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
666{
667    #[inline(always)]
668    fn from(model: &'m M) -> Self {
669        Self::from_iterable_entropy_model(model)
670    }
671}
672
673/// An entropy model for a categorical probability distribution over arbitrary symbols, for
674/// encoding only.
675///
676/// You will usually want to use this type through one of its type aliases,
677/// [`DefaultNonContiguousCategoricalEncoderModel`] or
678/// [`SmallNonContiguousCategoricalEncoderModel`], see [discussion of
679/// presets](crate::stream#presets).
680///
681/// This type implements the trait [`EncoderModel`] but not the trait [`DecoderModel`].
682/// Thus, you can use a `NonContiguousCategoricalEncoderModel` for *encoding* with any of
683/// the stream encoders provided by the `constriction` crate, but not for decoding. If you
684/// want to decode data, use a [`NonContiguousCategoricalDecoderModel`] instead.
685///
686/// # Example
687///
688/// ```
689/// use constriction::{
690///     stream::{stack::DefaultAnsCoder, Decode},
691///     stream::model::DefaultNonContiguousCategoricalEncoderModel,
692///     stream::model::DefaultNonContiguousCategoricalDecoderModel,
693///     UnwrapInfallible,
694/// };
695///
696/// // Create a `ContiguousCategoricalEntropyModel` that approximates floating point probabilities.
697/// let alphabet = ['M', 'i', 's', 'p', '!'];
698/// let probabilities = [0.09, 0.36, 0.36, 0.18, 0.0];
699/// let encoder_model = DefaultNonContiguousCategoricalEncoderModel
700///     ::from_symbols_and_floating_point_probabilities_fast(
701///         alphabet.iter().cloned(),
702///         &probabilities,
703///         None
704///     )
705///     .unwrap();
706/// assert_eq!(encoder_model.support_size(), 5); // `encoder_model` supports 4 symbols.
707///
708/// // Use `encoder_model` for entropy coding.
709/// let message = "Mississippi!";
710/// let mut ans_coder = DefaultAnsCoder::new();
711/// ans_coder.encode_iid_symbols_reverse(message.chars(), &encoder_model).unwrap();
712/// // Note that `message` contains the symbol '!', which has zero probability under our
713/// // floating-point model. However, we can still encode the symbol because the
714/// // `NonContiguousCategoricalEntropyModel` is "leaky", i.e., it assigns a nonzero
715/// // probability to all symbols that we provided to the constructor.
716///
717/// // Create a matching `decoder_model`, decode the encoded message, and verify correctness.
718/// let decoder_model = DefaultNonContiguousCategoricalDecoderModel
719///     ::from_symbols_and_floating_point_probabilities_fast(
720///         &alphabet, &probabilities, None
721///     )
722///     .unwrap();
723///
724/// // We could pass `decoder_model` by reference (like we did for `encoder_model` above) but
725/// // passing `decoder_model.as_view()` is slightly more efficient.
726/// let decoded = ans_coder
727///     .decode_iid_symbols(12, decoder_model.as_view())
728///     .collect::<Result<String, _>>()
729///     .unwrap_infallible();
730/// assert_eq!(decoded, message);
731/// assert!(ans_coder.is_empty());
732///
733/// // The `encoder_model` assigns zero probability to any symbols that were not provided to its
734/// // constructor, so trying to encode a message that contains such a symbol will fail.
735/// assert!(ans_coder.encode_iid_symbols_reverse("Mix".chars(), &encoder_model).is_err())
736/// // ERROR: symbol 'x' is not in the support of `encoder_model`.
737/// ```
738///
739/// # When Should I Use This Type of Entropy Model?
740///
741/// Use a `NonContiguousCategoricalEncoderModel` for probabilistic models that can *only* be
742/// represented as an explicit probability table, and not by some more compact analytic
743/// expression.
744///
745/// Use a `NonContiguousCategoricalDecoderModel` for probabilistic models that can *only* be
746/// represented as an explicit probability table, and not by some more compact analytic
747/// expression.
748///
749/// - If you have a probability model that can be expressed by some analytical expression
750///   (e.g., a [`Binomial`](probability::distribution::Binomial) distribution), then use
751///   [`LeakyQuantizer`] instead (unless you want to encode lots of symbols with the same
752///   entropy model, in which case the explicitly tabulated representation of a categorical
753///   entropy model could improve runtime performance).
754/// - If the *support* of your probabilistic model (i.e., the set of symbols to which the
755///   model assigns a non-zero probability) is a contiguous range of integers starting at
756///   zero, then it is better to use a [`ContiguousCategoricalEntropyModel`]. It has better
757///   computational efficiency and it is easier to use since it supports both encoding and
758///   decoding with a single type.
759/// - If you want to encode only a few symbols with a given probability model, then use a
760///   [`LazyContiguousCategoricalEntropyModel`], which will be faster (use `HashMap` to
761///   first map from your noncontiguous support to indices in a contiguous range `0..N`,
762///   where `N` is the size of your support). This use case occurs, e.g., in autoregressive
763///   models, where each individual model is often used for only exactly one symbol.
764///
765/// # Computational Efficiency
766///
767/// For a probability distribution with a support of `N` symbols, a
768/// `NonContiguousCategoricalEncoderModel` has the following asymptotic costs:
769///
770/// - creation:
771///   - runtime cost: `Θ(N log(N))` (when creating with the [`..._fast` constructor]);
772///   - memory footprint: `Θ(N)`;
773/// - encoding a symbol (calling [`EncoderModel::left_cumulative_and_probability`]):
774///   - expected runtime cost: `Θ(1)` (worst case can be more expensive, uses a `HashMap`
775///     under the hood).
776///   - memory footprint: no heap allocations, constant stack space.
777/// - decoding a symbol: not supported; use a [`NonContiguousCategoricalDecoderModel`].
778///
779/// [`EntropyModel`]: trait.EntropyModel.html
780/// [`Encode`]: crate::Encode
781/// [`Decode`]: crate::Decode
782/// [`HashMap`]: std::hash::HashMap
783/// [`ContiguousCategoricalEntropyModel`]: crate::stream::model::ContiguousCategoricalEntropyModel
784/// [`LeakyQuantizer`]: crate::stream::model::LeakyQuantizer
785/// [`..._fast` constructor]: Self::from_symbols_and_floating_point_probabilities_fast
786/// [`LazyContiguousCategoricalEntropyModel`]:
787///     crate::stream::model::LazyContiguousCategoricalEntropyModel
788#[derive(Debug, Clone)]
789pub struct NonContiguousCategoricalEncoderModel<Symbol, Probability, const PRECISION: usize>
790where
791    Symbol: Hash,
792    Probability: BitArray,
793{
794    table: HashMap<Symbol, (Probability, Probability::NonZero)>,
795}
796
797impl<Symbol, Probability, const PRECISION: usize>
798    NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
799where
800    Symbol: Hash + Eq,
801    Probability: BitArray,
802{
803    /// Constructs a leaky distribution (for encoding) over the provided `symbols` whose PMF
804    /// approximates given `probabilities`.
805    ///
806    /// Semantics are analogous to
807    /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`],
808    /// except that this constructor has an additional `symbols` argument to provide an
809    /// iterator over the symbols in the alphabet (which has to yield exactly
810    /// `probabilities.len()` symbols).
811    ///
812    /// # See also
813    ///
814    /// - [`from_symbols_and_floating_point_probabilities_perfect`], which can be
815    ///   considerably slower but typically approximates the provided `probabilities` *very
816    ///   slightly* better.
817    ///
818    /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`]:
819    ///     crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast
820    /// [`from_symbols_and_floating_point_probabilities_perfect`]:
821    ///     Self::from_symbols_and_floating_point_probabilities_perfect
822    #[allow(clippy::result_unit_err)]
823    pub fn from_symbols_and_floating_point_probabilities_fast<F>(
824        symbols: impl IntoIterator<Item = Symbol>,
825        probabilities: &[F],
826        normalization: Option<F>,
827    ) -> Result<Self, ()>
828    where
829        F: FloatCore + core::iter::Sum<F> + AsPrimitive<Probability>,
830        Probability: AsPrimitive<usize>,
831        usize: AsPrimitive<Probability> + AsPrimitive<F>,
832    {
833        let cdf = fast_quantized_cdf::<Probability, F, PRECISION>(probabilities, normalization)?;
834        Self::from_symbols_and_cdf(symbols, cdf)
835    }
836
837    /// Slower variant of [`from_symbols_and_floating_point_probabilities_fast`].
838    ///
839    /// Similar to [`from_symbols_and_floating_point_probabilities_fast`], but the resulting
840    /// (fixed-point precision) model typically approximates the provided floating point
841    /// `probabilities` *very slightly* better. Only recommended if compression performance
842    /// is *much* more important to you than runtime as this constructor can be
843    /// significantly slower.
844    ///
845    /// See [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]
846    /// for a detailed comparison between `..._fast` and `..._perfect` constructors of
847    /// categorical entropy models.
848    ///
849    /// [`from_symbols_and_floating_point_probabilities_fast`]:
850    ///     Self::from_symbols_and_floating_point_probabilities_fast
851    /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]:
852    ///     crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect
853    #[allow(clippy::result_unit_err)]
854    pub fn from_symbols_and_floating_point_probabilities_perfect<F>(
855        symbols: impl IntoIterator<Item = Symbol>,
856        probabilities: &[F],
857    ) -> Result<Self, ()>
858    where
859        F: FloatCore + core::iter::Sum<F> + Into<f64>,
860        Probability: Into<f64> + AsPrimitive<usize>,
861        f64: AsPrimitive<Probability>,
862        usize: AsPrimitive<Probability>,
863    {
864        let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?;
865        Self::from_symbols_and_nonzero_fixed_point_probabilities(
866            symbols,
867            slots.into_iter().map(|slot| slot.weight),
868            false,
869        )
870    }
871
872    /// Deprecated constructor.
873    ///
874    /// This constructor has been deprecated in constriction version 0.4.0, and it will be
875    /// removed in constriction version 0.5.0.
876    ///
877    /// # Upgrade Instructions
878    ///
879    /// Most *new* use cases should call
880    /// [`from_symbols_and_floating_point_probabilities_fast`] instead. Using that
881    /// constructor (abbreviated as `..._fast` in the following) may lead to very slightly
882    /// larger bit rates, but it runs considerably faster.
883    ///
884    /// However, note that the `..._fast` constructor breaks binary compatibility with
885    /// `constriction` version <= 0.3.5. If you need to be able to exchange binary
886    /// compressed data with a program that uses a categorical entropy model from
887    /// `constriction` version <= 0.3.5, then call
888    /// [`from_symbols_and_floating_point_probabilities_perfect`] instead (`..._perfect` for
889    /// short). Another reason for using the `..._perfect` constructor could be if
890    /// compression performance is *much* more important to you than runtime performance.
891    /// See documentation of [`from_symbols_and_floating_point_probabilities_perfect`] for
892    /// more information.
893    ///
894    /// # Compatibility Table
895    ///
896    /// (In the following table, "encoding" refers to
897    /// [`NonContiguousCategoricalDecoderModel`])
898    ///
899    /// | constructor used for encoding → <br> ↓ constructor used for decoding ↓ | legacy <br> (this one) |  [`..._perfect`] | [`..._fast`] |
900    /// | --------------------: | --------------- | --------------- | --------------- |
901    /// | **[legacy](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities)** | ✅ compatible   | ✅ compatible   | ❌ incompatible |
902    /// | **[`..._perfect`](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities_perfect)**   | ✅ compatible   | ✅ compatible   | ❌ incompatible |
903    /// | **[`..._fast`](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities_fast)**      | ❌ incompatible | ❌ incompatible | ✅ compatible   |
904    ///
905    /// [`from_symbols_and_floating_point_probabilities_perfect`]:
906    ///     Self::from_symbols_and_floating_point_probabilities_perfect
907    /// [`..._perfect`]: Self::from_symbols_and_floating_point_probabilities_perfect
908    /// [`from_symbols_and_floating_point_probabilities_fast`]:
909    ///     Self::from_symbols_and_floating_point_probabilities_fast
910    /// [`..._fast`]: Self::from_symbols_and_floating_point_probabilities_fast
911    #[deprecated(
912        since = "0.4.0",
913        note = "Please use `from_symbols_and_floating_point_probabilities_fast` or \
914        `from_symbols_and_floating_point_probabilities_perfect` instead. See documentation for \
915        detailed upgrade instructions."
916    )]
917    #[allow(clippy::result_unit_err)]
918    pub fn from_symbols_and_floating_point_probabilities<F>(
919        symbols: impl IntoIterator<Item = Symbol>,
920        probabilities: &[F],
921    ) -> Result<Self, ()>
922    where
923        F: FloatCore + core::iter::Sum<F> + Into<f64>,
924        Probability: Into<f64> + AsPrimitive<usize>,
925        f64: AsPrimitive<Probability>,
926        usize: AsPrimitive<Probability>,
927    {
928        Self::from_symbols_and_floating_point_probabilities_perfect(symbols, probabilities)
929    }
930
931    /// Constructs a distribution with a PMF given in fixed point arithmetic.
932    ///
933    /// This method operates logically identically to
934    /// [`NonContiguousCategoricalDecoderModel::from_symbols_and_nonzero_fixed_point_probabilities`]
935    /// except that it constructs an [`EncoderModel`] rather than a [`DecoderModel`].
936    #[allow(clippy::result_unit_err)]
937    pub fn from_symbols_and_nonzero_fixed_point_probabilities<S, P>(
938        symbols: S,
939        probabilities: P,
940        infer_last_probability: bool,
941    ) -> Result<Self, ()>
942    where
943        S: IntoIterator<Item = Symbol>,
944        P: IntoIterator,
945        P::Item: Borrow<Probability>,
946    {
947        let symbols = symbols.into_iter();
948        let mut table =
949            HashMap::with_capacity(symbols.size_hint().0 + infer_last_probability as usize);
950        let mut symbols = accumulate_nonzero_probabilities::<_, _, _, _, _, PRECISION>(
951            symbols,
952            probabilities.into_iter(),
953            |symbol, left_sided_cumulative, probability| match table.entry(symbol) {
954                Occupied(_) => Err(()),
955                Vacant(slot) => {
956                    let probability = probability.into_nonzero().ok_or(())?;
957                    slot.insert((left_sided_cumulative, probability));
958                    Ok(())
959                }
960            },
961            infer_last_probability,
962        )?;
963
964        if symbols.next().is_some() {
965            Err(())
966        } else {
967            Ok(Self { table })
968        }
969    }
970
971    #[allow(clippy::result_unit_err)]
972    fn from_symbols_and_cdf<S, P>(symbols: S, cdf: P) -> Result<Self, ()>
973    where
974        S: IntoIterator<Item = Symbol>,
975        P: IntoIterator<Item = Probability>,
976    {
977        let mut symbols = symbols.into_iter();
978        let mut cdf = cdf.into_iter();
979        let mut table = HashMap::with_capacity(symbols.size_hint().0);
980
981        let mut left_cumulative = cdf.next().ok_or(())?;
982        for right_cumulative in cdf {
983            let symbol = symbols.next().ok_or(())?;
984            match table.entry(symbol) {
985                Occupied(_) => return Err(()),
986                Vacant(slot) => {
987                    let probability = (right_cumulative - left_cumulative)
988                        .into_nonzero()
989                        .ok_or(())?;
990                    slot.insert((left_cumulative, probability));
991                }
992            }
993            left_cumulative = right_cumulative;
994        }
995
996        let last_symbol = symbols.next().ok_or(())?;
997        let right_cumulative = wrapping_pow2::<Probability>(PRECISION);
998        match table.entry(last_symbol) {
999            Occupied(_) => return Err(()),
1000            Vacant(slot) => {
1001                let probability = right_cumulative
1002                    .wrapping_sub(&left_cumulative)
1003                    .into_nonzero()
1004                    .ok_or(())?;
1005                slot.insert((left_cumulative, probability));
1006            }
1007        }
1008
1009        if symbols.next().is_some() {
1010            Err(())
1011        } else {
1012            Ok(Self { table })
1013        }
1014    }
1015
1016    /// Creates a `NonContiguousCategoricalEncoderModel` from any entropy model that
1017    /// implements [`IterableEntropyModel`].
1018    ///
1019    /// Calling `NonContiguousCategoricalEncoderModel::from_iterable_entropy_model(&model)`
1020    /// is equivalent to calling `model.to_generic_encoder_model()`, where the latter
1021    /// requires bringing [`IterableEntropyModel`] into scope.
1022    pub fn from_iterable_entropy_model<'m, M>(model: &'m M) -> Self
1023    where
1024        M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
1025    {
1026        let table = model
1027            .symbol_table()
1028            .map(|(symbol, left_sided_cumulative, probability)| {
1029                (symbol, (left_sided_cumulative, probability))
1030            })
1031            .collect::<HashMap<_, _>>();
1032        Self { table }
1033    }
1034
1035    /// Returns the number of symbols in the support of the model.
1036    ///
1037    /// The support of the model is the set of all symbols that have nonzero probability.
1038    pub fn support_size(&self) -> usize {
1039        self.table.len()
1040    }
1041
1042    /// Returns the entropy in units of bits (i.e., base 2).
1043    ///
1044    /// Similar to [`IterableEntropyModel::entropy_base2`], except that
1045    /// - this type doesn't implement `IterableEntropyModel` because it doesn't store
1046    ///   entries in a stable expected order;
1047    /// - because the order in which entries are stored will generally be different on each
1048    ///   program execution, rounding errors will be slightly different across multiple
1049    ///   program executions.
1050    pub fn entropy_base2<F>(&self) -> F
1051    where
1052        F: num_traits::Float + core::iter::Sum,
1053        Probability: Into<F>,
1054    {
1055        let entropy_scaled = self
1056            .table
1057            .values()
1058            .map(|&(_, probability)| {
1059                let probability = probability.get().into();
1060                probability * probability.log2() // probability is guaranteed to be nonzero.
1061            })
1062            .sum::<F>();
1063
1064        let whole = (F::one() + F::one()) * (Probability::one() << (PRECISION - 1)).into();
1065        F::from(PRECISION).unwrap() - entropy_scaled / whole
1066    }
1067}
1068
1069impl<'m, Symbol, Probability, M, const PRECISION: usize> From<&'m M>
1070    for NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
1071where
1072    Symbol: Hash + Eq,
1073    Probability: BitArray,
1074    M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
1075{
1076    #[inline(always)]
1077    fn from(model: &'m M) -> Self {
1078        Self::from_iterable_entropy_model(model)
1079    }
1080}
1081
1082impl<Symbol, Probability: BitArray, const PRECISION: usize> EntropyModel<PRECISION>
1083    for NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
1084where
1085    Symbol: Hash,
1086    Probability: BitArray,
1087{
1088    type Probability = Probability;
1089    type Symbol = Symbol;
1090}
1091
1092impl<Symbol, Probability: BitArray, const PRECISION: usize> EncoderModel<PRECISION>
1093    for NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
1094where
1095    Symbol: Hash + Eq,
1096    Probability: BitArray,
1097{
1098    #[inline(always)]
1099    fn left_cumulative_and_probability(
1100        &self,
1101        symbol: impl Borrow<Self::Symbol>,
1102    ) -> Option<(Self::Probability, Probability::NonZero)> {
1103        self.table.get(symbol.borrow()).cloned()
1104    }
1105}
1106
1107#[cfg(test)]
1108mod tests {
1109    use super::super::super::tests::{test_iterable_entropy_model, verify_iterable_entropy_model};
1110
1111    use super::*;
1112
1113    #[test]
1114    fn non_contiguous_categorical() {
1115        let hist = [
1116            1u32, 186545, 237403, 295700, 361445, 433686, 509456, 586943, 663946, 737772, 1657269,
1117            896675, 922197, 930672, 916665, 0, 0, 0, 0, 0, 723031, 650522, 572300, 494702, 418703,
1118            347600, 1, 283500, 226158, 178194, 136301, 103158, 76823, 55540, 39258, 27988, 54269,
1119        ];
1120        let probabilities = hist.iter().map(|&x| x as f64).collect::<Vec<_>>();
1121        let symbols = "QWERTYUIOPASDFGHJKLZXCVBNM 1234567890"
1122            .chars()
1123            .collect::<Vec<_>>();
1124
1125        let fast =
1126            NonContiguousCategoricalDecoderModel::<_,u32, _, 32>::from_symbols_and_floating_point_probabilities_fast(
1127                symbols.iter().cloned(),
1128                &probabilities,
1129                None
1130            )
1131            .unwrap();
1132        test_iterable_entropy_model(&fast, symbols.iter().cloned());
1133        let kl_fast = verify_iterable_entropy_model(&fast, &hist, 1e-8);
1134
1135        let perfect =
1136            NonContiguousCategoricalDecoderModel::<_,u32, _, 32>::from_symbols_and_floating_point_probabilities_perfect(
1137                symbols.iter().cloned(),
1138                &probabilities,
1139            )
1140            .unwrap();
1141        test_iterable_entropy_model(&perfect, symbols.iter().cloned());
1142        let kl_perfect = verify_iterable_entropy_model(&perfect, &hist, 1e-8);
1143
1144        assert!(kl_perfect < kl_fast);
1145    }
1146}
constriction/stream/model/categorical/non_contiguous.rs

constriction/stream/model/categorical/
non_contiguous.rs