constriction/stream/model/categorical/non_contiguous.rs
1use core::{borrow::Borrow, hash::Hash, marker::PhantomData};
2
3#[cfg(feature = "std")]
4use std::collections::{
5 hash_map::Entry::{Occupied, Vacant},
6 HashMap,
7};
8
9#[cfg(not(feature = "std"))]
10use hashbrown::hash_map::{
11 Entry::{Occupied, Vacant},
12 HashMap,
13};
14
15use alloc::{boxed::Box, vec::Vec};
16use num_traits::{float::FloatCore, AsPrimitive};
17
18use crate::{wrapping_pow2, BitArray, NonZeroBitArray};
19
20use super::{
21 super::{DecoderModel, EncoderModel, EntropyModel, IterableEntropyModel},
22 accumulate_nonzero_probabilities, fast_quantized_cdf, iter_extended_cdf,
23 lookup_noncontiguous::NonContiguousLookupDecoderModel,
24 perfectly_quantized_probabilities,
25};
26
27/// Type alias for a typical [`NonContiguousCategoricalEncoderModel`].
28///
29/// See:
30/// - [`NonContiguousCategoricalEncoderModel`]
31/// - [discussion of presets](crate::stream#presets)
32pub type DefaultNonContiguousCategoricalEncoderModel<Symbol> =
33 NonContiguousCategoricalEncoderModel<Symbol, u32, 24>;
34
35/// Type alias for a [`NonContiguousCategoricalEncoderModel`] optimized for compatibility
36/// with lookup decoder models.
37///
38/// See:
39/// - [`NonContiguousCategoricalEncoderModel`]
40/// - [discussion of presets](crate::stream#presets)
41pub type SmallNonContiguousCategoricalEncoderModel<Symbol> =
42 NonContiguousCategoricalEncoderModel<Symbol, u16, 12>;
43
44/// Type alias for a typical [`NonContiguousCategoricalDecoderModel`].
45///
46/// See:
47/// - [`NonContiguousCategoricalDecoderModel`]
48/// - [discussion of presets](crate::stream#presets)
49pub type DefaultNonContiguousCategoricalDecoderModel<Symbol, Cdf = Vec<(u32, Symbol)>> =
50 NonContiguousCategoricalDecoderModel<Symbol, u32, Cdf, 24>;
51
52/// Type alias for a [`NonContiguousCategoricalDecoderModel`] optimized for compatibility
53/// with lookup decoder models.
54///
55/// See:
56/// - [`NonContiguousCategoricalDecoderModel`]
57/// - [discussion of presets](crate::stream#presets)
58pub type SmallNonContiguousCategoricalDecoderModel<Symbol, Cdf = Vec<(u16, Symbol)>> =
59 NonContiguousCategoricalDecoderModel<Symbol, u16, Cdf, 12>;
60
61/// An entropy model for a categorical probability distribution over arbitrary symbols, for
62/// decoding only.
63///
64/// You will usually want to use this type through one of its type aliases,
65/// [`DefaultNonContiguousCategoricalDecoderModel`] or
66/// [`SmallNonContiguousCategoricalDecoderModel`], see [discussion of
67/// presets](crate::stream#presets).
68///
69/// This type implements the trait [`DecoderModel`] but not the trait [`EncoderModel`].
70/// Thus, you can use a `NonContiguousCategoricalDecoderModel` for *decoding* with any of
71/// the stream decoders provided by the `constriction` crate, but not for encoding. If you
72/// want to encode data, use a [`NonContiguousCategoricalEncoderModel`] instead. You can
73/// convert a `NonContiguousCategoricalDecoderModel` to a
74/// `NonContiguousCategoricalEncoderModel` by calling
75/// [`to_generic_encoder_model`](IterableEntropyModel::to_generic_encoder_model) on it
76/// (you'll have to bring the trait [`IterableEntropyModel`] into scope to do so: `use
77/// constriction::stream::model::IterableEntropyModel`).
78///
79/// # Example
80///
81/// See [example for
82/// `NonContiguousCategoricalEncoderModel`](NonContiguousCategoricalEncoderModel#example).
83///
84/// # When Should I Use This Type of Entropy Model?
85///
86/// Use a `NonContiguousCategoricalDecoderModel` for probabilistic models that can *only* be
87/// represented as an explicit probability table, and not by some more compact analytic
88/// expression.
89///
90/// - If you have a probability model that can be expressed by some analytical expression
91/// (e.g., a [`Binomial`](probability::distribution::Binomial) distribution), then use
92/// [`LeakyQuantizer`] instead (unless you want to encode lots of symbols with the same
93/// entropy model, in which case the explicitly tabulated representation of a categorical
94/// entropy model could improve runtime performance).
95/// - If the *support* of your probabilistic model (i.e., the set of symbols to which the
96/// model assigns a non-zero probability) is a contiguous range of integers starting at
97/// zero, then it is better to use a [`ContiguousCategoricalEntropyModel`]. It has better
98/// computational efficiency and it is easier to use since it supports both encoding and
99/// decoding with a single type.
100/// - If you want to decode only a few symbols with a given probability model, then use a
101/// [`LazyContiguousCategoricalEntropyModel`], which will be faster (use an array to map
102/// the decoded symbols from the contiguous range `0..N` to whatever noncontiguous
103/// alphabet you have). This use case occurs, e.g., in autoregressive models, where each
104/// individual model is often used for only exactly one symbol.
105/// - If you want to decode lots of symbols with the same entropy model, and if reducing the
106/// `PRECISION` to a moderate value is acceptable to you, then you may want to consider
107/// using a [`NonContiguousLookupDecoderModel`] instead for even better runtime
108/// performance (at the cost of a larger memory footprint and worse compression efficiency
109/// due to lower `PRECISION`).
110///
111/// # Computational Efficiency
112///
113/// For a probability distribution with a support of `N` symbols, a
114/// `NonContiguousCategoricalDecoderModel` has the following asymptotic costs:
115///
116/// - creation:
117/// - runtime cost: `Θ(N log(N))` (when creating with the [`..._fast` constructor]);
118/// - memory footprint: `Θ(N)`;
119/// - encoding a symbol: not supported; use a [`NonContiguousCategoricalEncoderModel`]
120/// instead.
121/// - decoding a symbol (calling [`DecoderModel::quantile_function`]):
122/// - runtime cost: `Θ(log(N))` (both expected and worst-case)
123/// - memory footprint: no heap allocations, constant stack space.
124///
125/// [`EntropyModel`]: trait.EntropyModel.html
126/// [`Encode`]: crate::Encode
127/// [`Decode`]: crate::Decode
128/// [`HashMap`]: std::hash::HashMap
129/// [`ContiguousCategoricalEntropyModel`]:
130/// crate::stream::model::ContiguousCategoricalEntropyModel
131/// [`NonContiguousLookupDecoderModel`]:
132/// crate::stream::model::NonContiguousLookupDecoderModel
133/// [`LeakyQuantizer`]: crate::stream::model::LeakyQuantizer
134/// [`..._fast` constructor]: Self::from_symbols_and_floating_point_probabilities_fast
135/// [`LazyContiguousCategoricalEntropyModel`]:
136/// crate::stream::model::LazyContiguousCategoricalEntropyModel
137#[derive(Debug, Clone, Copy)]
138pub struct NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, const PRECISION: usize> {
139 /// Invariants:
140 /// - `cdf.len() >= 2` (actually, we currently even guarantee `cdf.len() >= 3` but
141 /// this may be relaxed in the future)
142 /// - `cdf[0] == 0`
143 /// - `cdf` is monotonically increasing except that it may wrap around only at
144 /// the very last entry (this happens iff `PRECISION == Probability::BITS`).
145 /// Thus, all probabilities within range are guaranteed to be nonzero.
146 pub(super) cdf: Cdf,
147
148 pub(super) phantom: PhantomData<(Symbol, Probability)>,
149}
150
151impl<Symbol, Probability: BitArray, const PRECISION: usize>
152 NonContiguousCategoricalDecoderModel<Symbol, Probability, Vec<(Probability, Symbol)>, PRECISION>
153where
154 Symbol: Clone,
155{
156 /// Constructs a leaky distribution (for decoding) over the provided `symbols` whose PMF
157 /// approximates given `probabilities`.
158 ///
159 /// Semantics are analogous to
160 /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`],
161 /// except that this constructor has an additional `symbols` argument to provide an
162 /// iterator over the symbols in the alphabet (which has to yield exactly
163 /// `probabilities.len()` symbols).
164 ///
165 /// # See also
166 ///
167 /// - [`from_symbols_and_floating_point_probabilities_perfect`], which can be
168 /// considerably slower but typically approximates the provided `probabilities` *very
169 /// slightly* better.
170 ///
171 /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`]:
172 /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast
173 /// [`from_symbols_and_floating_point_probabilities_perfect`]:
174 /// Self::from_symbols_and_floating_point_probabilities_perfect
175 #[allow(clippy::result_unit_err)]
176 pub fn from_symbols_and_floating_point_probabilities_fast<F>(
177 symbols: impl IntoIterator<Item = Symbol>,
178 probabilities: &[F],
179 normalization: Option<F>,
180 ) -> Result<Self, ()>
181 where
182 F: FloatCore + core::iter::Sum<F> + AsPrimitive<Probability>,
183 Probability: AsPrimitive<usize>,
184 usize: AsPrimitive<Probability> + AsPrimitive<F>,
185 {
186 let cdf = fast_quantized_cdf::<Probability, F, PRECISION>(probabilities, normalization)?;
187
188 let mut extended_cdf = Vec::with_capacity(probabilities.len() + 1);
189 extended_cdf.extend(cdf.zip(symbols));
190 let last_symbol = extended_cdf.last().expect("`len` >= 2").1.clone();
191 extended_cdf.push((wrapping_pow2(PRECISION), last_symbol));
192
193 Ok(Self::from_extended_cdf(extended_cdf))
194 }
195
196 /// Slower variant of [`from_symbols_and_floating_point_probabilities_fast`].
197 ///
198 /// Similar to [`from_symbols_and_floating_point_probabilities_fast`], but the resulting
199 /// (fixed-point precision) model typically approximates the provided floating point
200 /// `probabilities` *very slightly* better. Only recommended if compression performance
201 /// is *much* more important to you than runtime as this constructor can be
202 /// significantly slower.
203 ///
204 /// See [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]
205 /// for a detailed comparison between `..._fast` and `..._perfect` constructors of
206 /// categorical entropy models.
207 ///
208 /// [`from_symbols_and_floating_point_probabilities_fast`]:
209 /// Self::from_symbols_and_floating_point_probabilities_fast
210 /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]:
211 /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect
212 #[allow(clippy::result_unit_err)]
213 pub fn from_symbols_and_floating_point_probabilities_perfect<F>(
214 symbols: impl IntoIterator<Item = Symbol>,
215 probabilities: &[F],
216 ) -> Result<Self, ()>
217 where
218 F: FloatCore + core::iter::Sum<F> + Into<f64>,
219 Probability: Into<f64> + AsPrimitive<usize>,
220 f64: AsPrimitive<Probability>,
221 usize: AsPrimitive<Probability>,
222 {
223 let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?;
224 Self::from_symbols_and_nonzero_fixed_point_probabilities(
225 symbols,
226 slots.into_iter().map(|slot| slot.weight),
227 false,
228 )
229 }
230
231 /// Deprecated constructor.
232 ///
233 /// This constructor has been deprecated in constriction version 0.4.0, and it will be
234 /// removed in constriction version 0.5.0.
235 ///
236 /// # Upgrade Instructions
237 ///
238 /// Most *new* use cases should call
239 /// [`from_symbols_and_floating_point_probabilities_fast`] instead. Using that
240 /// constructor (abbreviated as `..._fast` in the following) may lead to very slightly
241 /// larger bit rates, but it runs considerably faster.
242 ///
243 /// However, note that the `..._fast` constructor breaks binary compatibility with
244 /// `constriction` version <= 0.3.5. If you need to be able to exchange binary
245 /// compressed data with a program that uses a categorical entropy model from
246 /// `constriction` version <= 0.3.5, then call
247 /// [`from_symbols_and_floating_point_probabilities_perfect`] instead (`..._perfect` for
248 /// short). Another reason for using the `..._perfect` constructor could be if
249 /// compression performance is *much* more important to you than runtime performance.
250 /// See documentation of [`from_symbols_and_floating_point_probabilities_perfect`] for
251 /// more information.
252 ///
253 /// # Compatibility Table
254 ///
255 /// (In the following table, "encoding" refers to
256 /// [`NonContiguousCategoricalEncoderModel`])
257 ///
258 /// | constructor used for encoding → <br> ↓ constructor used for decoding ↓ | [legacy](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities) | [`..._perfect`](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_perfect) | [`..._fast`](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_fast) |
259 /// | --------------------: | --------------- | --------------- | --------------- |
260 /// | **legacy (this one)** | ✅ compatible | ✅ compatible | ❌ incompatible |
261 /// | **[`..._perfect`]** | ✅ compatible | ✅ compatible | ❌ incompatible |
262 /// | **[`..._fast`]** | ❌ incompatible | ❌ incompatible | ✅ compatible |
263 ///
264 /// [`from_symbols_and_floating_point_probabilities_perfect`]:
265 /// Self::from_symbols_and_floating_point_probabilities_perfect
266 /// [`..._perfect`]: Self::from_symbols_and_floating_point_probabilities_perfect
267 /// [`from_symbols_and_floating_point_probabilities_fast`]:
268 /// Self::from_symbols_and_floating_point_probabilities_fast
269 /// [`..._fast`]: Self::from_symbols_and_floating_point_probabilities_fast
270 #[deprecated(
271 since = "0.4.0",
272 note = "Please use `from_symbols_and_floating_point_probabilities_fast` or \
273 `from_symbols_and_floating_point_probabilities_perfect` instead. See documentation for \
274 detailed upgrade instructions."
275 )]
276 #[allow(clippy::result_unit_err)]
277 pub fn from_symbols_and_floating_point_probabilities<F>(
278 symbols: &[Symbol],
279 probabilities: &[F],
280 ) -> Result<Self, ()>
281 where
282 F: FloatCore + core::iter::Sum<F> + Into<f64>,
283 Probability: Into<f64> + AsPrimitive<usize>,
284 f64: AsPrimitive<Probability>,
285 usize: AsPrimitive<Probability>,
286 {
287 Self::from_symbols_and_floating_point_probabilities_perfect(
288 symbols.iter().cloned(),
289 probabilities,
290 )
291 }
292
293 /// Constructs a distribution with a PMF given in fixed point arithmetic.
294 ///
295 /// This is a low level method that allows, e.g,. reconstructing a probability
296 /// distribution previously exported with [`symbol_table`]. The more common way to
297 /// construct a `NonContiguousCategoricalDecoderModel` is via
298 /// [`from_symbols_and_floating_point_probabilities_fast`].
299 ///
300 /// The items of `probabilities` have to be nonzero and smaller than `1 << PRECISION`,
301 /// where `PRECISION` is a const generic parameter on the
302 /// `NonContiguousCategoricalDecoderModel`.
303 ///
304 /// If `infer_last_probability` is `false` then `probabilities` must yield the same
305 /// number of items as `symbols` does, and the items yielded by `probabilities` have to
306 /// to (logically) sum up to `1 << PRECISION`. If `infer_last_probability` is `true`
307 /// then `probabilities` must yield one fewer item than `symbols`, they must sum up to a
308 /// value strictly smaller than `1 << PRECISION`, and the method will assign the
309 /// (nonzero) remaining probability to the last symbol.
310 ///
311 /// # Example
312 ///
313 /// Creating a `NonContiguousCategoricalDecoderModel` with inferred probability of the
314 /// last symbol:
315 ///
316 /// ```
317 /// use constriction::stream::model::{
318 /// DefaultNonContiguousCategoricalDecoderModel, IterableEntropyModel
319 /// };
320 ///
321 /// let partial_probabilities = vec![1u32 << 21, 1 << 22, 1 << 22, 1 << 22];
322 /// // `partial_probabilities` sums up to strictly less than `1 << PRECISION` as required:
323 /// assert!(partial_probabilities.iter().sum::<u32>() < 1 << 24);
324 ///
325 /// let symbols = "abcde"; // Has one more entry than `probabilities`
326 ///
327 /// let model = DefaultNonContiguousCategoricalDecoderModel
328 /// ::from_symbols_and_nonzero_fixed_point_probabilities(
329 /// symbols.chars(), &partial_probabilities, true).unwrap();
330 /// let symbol_table = model.floating_point_symbol_table::<f64>().collect::<Vec<_>>();
331 /// assert_eq!(
332 /// symbol_table,
333 /// vec![
334 /// ('a', 0.0, 0.125),
335 /// ('b', 0.125, 0.25),
336 /// ('c', 0.375, 0.25),
337 /// ('d', 0.625, 0.25),
338 /// ('e', 0.875, 0.125), // Inferred last probability.
339 /// ]
340 /// );
341 /// ```
342 ///
343 /// For more related examples, see
344 /// [`ContiguousCategoricalEntropyModel::from_nonzero_fixed_point_probabilities`].
345 ///
346 /// [`symbol_table`]: IterableEntropyModel::symbol_table
347 /// [`fixed_point_probabilities`]: Self::fixed_point_probabilities
348 /// [`from_symbols_and_floating_point_probabilities_fast`]:
349 /// Self::from_symbols_and_floating_point_probabilities_fast
350 /// [`ContiguousCategoricalEntropyModel::from_nonzero_fixed_point_probabilities`]:
351 /// crate::stream::model::ContiguousCategoricalEntropyModel::from_nonzero_fixed_point_probabilities`
352 #[allow(clippy::result_unit_err)]
353 pub fn from_symbols_and_nonzero_fixed_point_probabilities<S, P>(
354 symbols: S,
355 probabilities: P,
356 infer_last_probability: bool,
357 ) -> Result<Self, ()>
358 where
359 S: IntoIterator<Item = Symbol>,
360 P: IntoIterator,
361 P::Item: Borrow<Probability>,
362 {
363 let symbols = symbols.into_iter();
364 let mut cdf = Vec::with_capacity(symbols.size_hint().0 + 1);
365 let mut symbols = accumulate_nonzero_probabilities::<_, _, _, _, _, PRECISION>(
366 symbols,
367 probabilities.into_iter(),
368 |symbol, left_sided_cumulative, _| {
369 cdf.push((left_sided_cumulative, symbol));
370 Ok(())
371 },
372 infer_last_probability,
373 )?;
374 cdf.push((
375 wrapping_pow2(PRECISION),
376 cdf.last().expect("`symbols` is not empty").1.clone(),
377 ));
378
379 if symbols.next().is_some() {
380 Err(())
381 } else {
382 Ok(Self::from_extended_cdf(cdf))
383 }
384 }
385
386 #[inline(always)]
387 fn from_extended_cdf(cdf: Vec<(Probability, Symbol)>) -> Self {
388 Self {
389 cdf,
390 phantom: PhantomData,
391 }
392 }
393
394 /// Creates a `NonContiguousCategoricalDecoderModel` from any entropy model that
395 /// implements [`IterableEntropyModel`].
396 ///
397 /// Calling `NonContiguousCategoricalDecoderModel::from_iterable_entropy_model(&model)`
398 /// is equivalent to calling `model.to_generic_decoder_model()`, where the latter
399 /// requires bringing [`IterableEntropyModel`] into scope.
400 pub fn from_iterable_entropy_model<'m, M>(model: &'m M) -> Self
401 where
402 M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
403 {
404 let symbol_table = model.symbol_table();
405 let mut cdf = Vec::with_capacity(symbol_table.size_hint().0 + 1);
406 cdf.extend(
407 symbol_table.map(|(symbol, left_sided_cumulative, _)| (left_sided_cumulative, symbol)),
408 );
409 cdf.push((
410 wrapping_pow2(PRECISION),
411 cdf.last().expect("`symbol_table` is not empty").1.clone(),
412 ));
413
414 Self {
415 cdf,
416 phantom: PhantomData,
417 }
418 }
419}
420
421impl<Symbol, Probability, Cdf, const PRECISION: usize>
422 NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
423where
424 Symbol: Clone,
425 Probability: BitArray,
426 Cdf: AsRef<[(Probability, Symbol)]>,
427{
428 /// Returns the number of symbols supported by the model, i.e., the number of symbols to
429 /// which the model assigns a nonzero probability.
430 #[inline(always)]
431 pub fn support_size(&self) -> usize {
432 self.cdf.as_ref().len() - 1
433 }
434
435 /// Makes a very cheap shallow copy of the model that can be used much like a shared
436 /// reference.
437 ///
438 /// The returned `NonContiguousCategoricalDecoderModel` implements `Copy`, which is a
439 /// requirement for some methods, such as [`Decode::decode_iid_symbols`]. These methods
440 /// could also accept a shared reference to a `NonContiguousCategoricalDecoderModel`
441 /// (since all references to entropy models are also entropy models, and all shared
442 /// references implement `Copy`), but passing a *view* instead may be slightly more
443 /// efficient because it avoids one level of dereferencing.
444 ///
445 /// [`Decode::decode_iid_symbols`]: crate::stream::Decode::decode_iid_symbols
446 #[inline]
447 pub fn as_view(
448 &self,
449 ) -> NonContiguousCategoricalDecoderModel<
450 Symbol,
451 Probability,
452 &[(Probability, Symbol)],
453 PRECISION,
454 > {
455 NonContiguousCategoricalDecoderModel {
456 cdf: self.cdf.as_ref(),
457 phantom: PhantomData,
458 }
459 }
460
461 /// Creates a [`ContiguousLookupDecoderModel`] or [`NonContiguousLookupDecoderModel`] for efficient decoding of i.i.d. data
462 ///
463 /// While a `NonContiguousCategoricalEntropyModel` can already be used for decoding (since
464 /// it implements [`DecoderModel`]), you may prefer converting it to a
465 /// `LookupDecoderModel` first for improved efficiency. Logically, the two will be
466 /// equivalent.
467 ///
468 /// # Warning
469 ///
470 /// You should only call this method if both of the following conditions are satisfied:
471 ///
472 /// - `PRECISION` is relatively small (typically `PRECISION == 12`, as in the "Small"
473 /// [preset]) because the memory footprint of a `LookupDecoderModel` grows
474 /// exponentially in `PRECISION`; and
475 /// - you're about to decode a relatively large number of symbols with the resulting
476 /// model; the conversion to a `LookupDecoderModel` bears a significant runtime and
477 /// memory overhead, so if you're going to use the resulting model only for a single
478 /// or a handful of symbols then you'll end up paying more than you gain.
479 ///
480 /// [preset]: crate::stream#presets
481 /// [`ContiguousLookupDecoderModel`]: crate::stream::model::ContiguousLookupDecoderModel
482 #[inline(always)]
483 pub fn to_lookup_decoder_model(
484 &self,
485 ) -> NonContiguousLookupDecoderModel<
486 Symbol,
487 Probability,
488 Vec<(Probability, Symbol)>,
489 Box<[Probability]>,
490 PRECISION,
491 >
492 where
493 Probability: Into<usize>,
494 usize: AsPrimitive<Probability>,
495 {
496 self.into()
497 }
498}
499
500impl<Symbol, Probability, Cdf, const PRECISION: usize> EntropyModel<PRECISION>
501 for NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
502where
503 Probability: BitArray,
504{
505 type Symbol = Symbol;
506 type Probability = Probability;
507}
508
509impl<'m, Symbol, Probability, Cdf, const PRECISION: usize> IterableEntropyModel<'m, PRECISION>
510 for NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
511where
512 Symbol: Clone + 'm,
513 Probability: BitArray,
514 Cdf: AsRef<[(Probability, Symbol)]>,
515{
516 #[inline(always)]
517 fn symbol_table(
518 &'m self,
519 ) -> impl Iterator<
520 Item = (
521 Self::Symbol,
522 Self::Probability,
523 <Self::Probability as BitArray>::NonZero,
524 ),
525 > {
526 iter_extended_cdf(self.cdf.as_ref().iter().cloned())
527 }
528
529 fn floating_point_symbol_table<F>(&'m self) -> impl Iterator<Item = (Self::Symbol, F, F)>
530 where
531 F: FloatCore + From<Self::Probability> + 'm,
532 Self::Probability: Into<F>,
533 {
534 // This gets compiled into a constant, and the divisions by `whole` get compiled
535 // into floating point multiplications rather than (slower) divisions.
536 let whole = (F::one() + F::one()) * (Self::Probability::one() << (PRECISION - 1)).into();
537
538 self.symbol_table()
539 .map(move |(symbol, cumulative, probability)| {
540 (
541 symbol,
542 cumulative.into() / whole,
543 probability.get().into() / whole,
544 )
545 })
546 }
547
548 fn entropy_base2<F>(&'m self) -> F
549 where
550 F: num_traits::Float + core::iter::Sum,
551 Self::Probability: Into<F>,
552 {
553 let entropy_scaled = self
554 .symbol_table()
555 .map(|(_, _, probability)| {
556 let probability = probability.get().into();
557 probability * probability.log2() // probability is guaranteed to be nonzero.
558 })
559 .sum::<F>();
560
561 let whole = (F::one() + F::one()) * (Self::Probability::one() << (PRECISION - 1)).into();
562 F::from(PRECISION).unwrap() - entropy_scaled / whole
563 }
564
565 fn to_generic_encoder_model(
566 &'m self,
567 ) -> NonContiguousCategoricalEncoderModel<Self::Symbol, Self::Probability, PRECISION>
568 where
569 Self::Symbol: core::hash::Hash + Eq,
570 {
571 self.into()
572 }
573
574 fn to_generic_decoder_model(
575 &'m self,
576 ) -> NonContiguousCategoricalDecoderModel<
577 Self::Symbol,
578 Self::Probability,
579 Vec<(Self::Probability, Self::Symbol)>,
580 PRECISION,
581 >
582 where
583 Self::Symbol: Clone,
584 {
585 self.into()
586 }
587
588 fn to_generic_lookup_decoder_model(
589 &'m self,
590 ) -> NonContiguousLookupDecoderModel<
591 Self::Symbol,
592 Self::Probability,
593 Vec<(Self::Probability, Self::Symbol)>,
594 Box<[Self::Probability]>,
595 PRECISION,
596 >
597 where
598 Self::Probability: Into<usize>,
599 usize: AsPrimitive<Self::Probability>,
600 Self::Symbol: Clone + Default,
601 {
602 self.into()
603 }
604}
605
606impl<Symbol, Probability, Cdf, const PRECISION: usize> DecoderModel<PRECISION>
607 for NonContiguousCategoricalDecoderModel<Symbol, Probability, Cdf, PRECISION>
608where
609 Symbol: Clone,
610 Probability: BitArray,
611 Cdf: AsRef<[(Probability, Symbol)]>,
612{
613 #[inline(always)]
614 fn quantile_function(
615 &self,
616 quantile: Self::Probability,
617 ) -> (Symbol, Probability, Probability::NonZero) {
618 let cdf = self.cdf.as_ref();
619 // SAFETY: `cdf` is not empty.
620 let monotonic_part_of_cdf = unsafe { cdf.get_unchecked(..cdf.len() - 1) };
621 let Err(next_index) = monotonic_part_of_cdf.binary_search_by(|(cumulative, _symbol)| {
622 if *cumulative <= quantile {
623 core::cmp::Ordering::Less
624 } else {
625 core::cmp::Ordering::Greater
626 }
627 }) else {
628 // SAFETY: our search criterion never returns `Equal`, so the search cannot succeed.
629 unsafe { core::hint::unreachable_unchecked() }
630 };
631
632 // SAFETY:
633 // - `next_index < cdf.len()` because we searched only within `monotonic_part_of_cdf`, which
634 // is one element shorter than `cdf`. Thus `cdf.get_unchecked(next_index)` is sound.
635 // - `next_index > 0` because `cdf[0] == 0` and our search goes right on equality; thus,
636 // `next_index - 1` does not wrap around, and so `next_index - 1` is also within bounds.
637 let (right_cumulative, (left_cumulative, symbol)) = unsafe {
638 (
639 cdf.get_unchecked(next_index).0,
640 cdf.get_unchecked(next_index - 1).clone(),
641 )
642 };
643
644 // SAFETY: our constructors don't allow zero probabilities.
645 let probability = unsafe {
646 right_cumulative
647 .wrapping_sub(&left_cumulative)
648 .into_nonzero_unchecked()
649 };
650
651 (symbol, left_cumulative, probability)
652 }
653}
654
655impl<'m, Symbol, Probability, M, const PRECISION: usize> From<&'m M>
656 for NonContiguousCategoricalDecoderModel<
657 Symbol,
658 Probability,
659 Vec<(Probability, Symbol)>,
660 PRECISION,
661 >
662where
663 Symbol: Clone,
664 Probability: BitArray,
665 M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
666{
667 #[inline(always)]
668 fn from(model: &'m M) -> Self {
669 Self::from_iterable_entropy_model(model)
670 }
671}
672
673/// An entropy model for a categorical probability distribution over arbitrary symbols, for
674/// encoding only.
675///
676/// You will usually want to use this type through one of its type aliases,
677/// [`DefaultNonContiguousCategoricalEncoderModel`] or
678/// [`SmallNonContiguousCategoricalEncoderModel`], see [discussion of
679/// presets](crate::stream#presets).
680///
681/// This type implements the trait [`EncoderModel`] but not the trait [`DecoderModel`].
682/// Thus, you can use a `NonContiguousCategoricalEncoderModel` for *encoding* with any of
683/// the stream encoders provided by the `constriction` crate, but not for decoding. If you
684/// want to decode data, use a [`NonContiguousCategoricalDecoderModel`] instead.
685///
686/// # Example
687///
688/// ```
689/// use constriction::{
690/// stream::{stack::DefaultAnsCoder, Decode},
691/// stream::model::DefaultNonContiguousCategoricalEncoderModel,
692/// stream::model::DefaultNonContiguousCategoricalDecoderModel,
693/// UnwrapInfallible,
694/// };
695///
696/// // Create a `ContiguousCategoricalEntropyModel` that approximates floating point probabilities.
697/// let alphabet = ['M', 'i', 's', 'p', '!'];
698/// let probabilities = [0.09, 0.36, 0.36, 0.18, 0.0];
699/// let encoder_model = DefaultNonContiguousCategoricalEncoderModel
700/// ::from_symbols_and_floating_point_probabilities_fast(
701/// alphabet.iter().cloned(),
702/// &probabilities,
703/// None
704/// )
705/// .unwrap();
706/// assert_eq!(encoder_model.support_size(), 5); // `encoder_model` supports 4 symbols.
707///
708/// // Use `encoder_model` for entropy coding.
709/// let message = "Mississippi!";
710/// let mut ans_coder = DefaultAnsCoder::new();
711/// ans_coder.encode_iid_symbols_reverse(message.chars(), &encoder_model).unwrap();
712/// // Note that `message` contains the symbol '!', which has zero probability under our
713/// // floating-point model. However, we can still encode the symbol because the
714/// // `NonContiguousCategoricalEntropyModel` is "leaky", i.e., it assigns a nonzero
715/// // probability to all symbols that we provided to the constructor.
716///
717/// // Create a matching `decoder_model`, decode the encoded message, and verify correctness.
718/// let decoder_model = DefaultNonContiguousCategoricalDecoderModel
719/// ::from_symbols_and_floating_point_probabilities_fast(
720/// &alphabet, &probabilities, None
721/// )
722/// .unwrap();
723///
724/// // We could pass `decoder_model` by reference (like we did for `encoder_model` above) but
725/// // passing `decoder_model.as_view()` is slightly more efficient.
726/// let decoded = ans_coder
727/// .decode_iid_symbols(12, decoder_model.as_view())
728/// .collect::<Result<String, _>>()
729/// .unwrap_infallible();
730/// assert_eq!(decoded, message);
731/// assert!(ans_coder.is_empty());
732///
733/// // The `encoder_model` assigns zero probability to any symbols that were not provided to its
734/// // constructor, so trying to encode a message that contains such a symbol will fail.
735/// assert!(ans_coder.encode_iid_symbols_reverse("Mix".chars(), &encoder_model).is_err())
736/// // ERROR: symbol 'x' is not in the support of `encoder_model`.
737/// ```
738///
739/// # When Should I Use This Type of Entropy Model?
740///
741/// Use a `NonContiguousCategoricalEncoderModel` for probabilistic models that can *only* be
742/// represented as an explicit probability table, and not by some more compact analytic
743/// expression.
744///
745/// Use a `NonContiguousCategoricalDecoderModel` for probabilistic models that can *only* be
746/// represented as an explicit probability table, and not by some more compact analytic
747/// expression.
748///
749/// - If you have a probability model that can be expressed by some analytical expression
750/// (e.g., a [`Binomial`](probability::distribution::Binomial) distribution), then use
751/// [`LeakyQuantizer`] instead (unless you want to encode lots of symbols with the same
752/// entropy model, in which case the explicitly tabulated representation of a categorical
753/// entropy model could improve runtime performance).
754/// - If the *support* of your probabilistic model (i.e., the set of symbols to which the
755/// model assigns a non-zero probability) is a contiguous range of integers starting at
756/// zero, then it is better to use a [`ContiguousCategoricalEntropyModel`]. It has better
757/// computational efficiency and it is easier to use since it supports both encoding and
758/// decoding with a single type.
759/// - If you want to encode only a few symbols with a given probability model, then use a
760/// [`LazyContiguousCategoricalEntropyModel`], which will be faster (use `HashMap` to
761/// first map from your noncontiguous support to indices in a contiguous range `0..N`,
762/// where `N` is the size of your support). This use case occurs, e.g., in autoregressive
763/// models, where each individual model is often used for only exactly one symbol.
764///
765/// # Computational Efficiency
766///
767/// For a probability distribution with a support of `N` symbols, a
768/// `NonContiguousCategoricalEncoderModel` has the following asymptotic costs:
769///
770/// - creation:
771/// - runtime cost: `Θ(N log(N))` (when creating with the [`..._fast` constructor]);
772/// - memory footprint: `Θ(N)`;
773/// - encoding a symbol (calling [`EncoderModel::left_cumulative_and_probability`]):
774/// - expected runtime cost: `Θ(1)` (worst case can be more expensive, uses a `HashMap`
775/// under the hood).
776/// - memory footprint: no heap allocations, constant stack space.
777/// - decoding a symbol: not supported; use a [`NonContiguousCategoricalDecoderModel`].
778///
779/// [`EntropyModel`]: trait.EntropyModel.html
780/// [`Encode`]: crate::Encode
781/// [`Decode`]: crate::Decode
782/// [`HashMap`]: std::hash::HashMap
783/// [`ContiguousCategoricalEntropyModel`]: crate::stream::model::ContiguousCategoricalEntropyModel
784/// [`LeakyQuantizer`]: crate::stream::model::LeakyQuantizer
785/// [`..._fast` constructor]: Self::from_symbols_and_floating_point_probabilities_fast
786/// [`LazyContiguousCategoricalEntropyModel`]:
787/// crate::stream::model::LazyContiguousCategoricalEntropyModel
788#[derive(Debug, Clone)]
789pub struct NonContiguousCategoricalEncoderModel<Symbol, Probability, const PRECISION: usize>
790where
791 Symbol: Hash,
792 Probability: BitArray,
793{
794 table: HashMap<Symbol, (Probability, Probability::NonZero)>,
795}
796
797impl<Symbol, Probability, const PRECISION: usize>
798 NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
799where
800 Symbol: Hash + Eq,
801 Probability: BitArray,
802{
803 /// Constructs a leaky distribution (for encoding) over the provided `symbols` whose PMF
804 /// approximates given `probabilities`.
805 ///
806 /// Semantics are analogous to
807 /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`],
808 /// except that this constructor has an additional `symbols` argument to provide an
809 /// iterator over the symbols in the alphabet (which has to yield exactly
810 /// `probabilities.len()` symbols).
811 ///
812 /// # See also
813 ///
814 /// - [`from_symbols_and_floating_point_probabilities_perfect`], which can be
815 /// considerably slower but typically approximates the provided `probabilities` *very
816 /// slightly* better.
817 ///
818 /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`]:
819 /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast
820 /// [`from_symbols_and_floating_point_probabilities_perfect`]:
821 /// Self::from_symbols_and_floating_point_probabilities_perfect
822 #[allow(clippy::result_unit_err)]
823 pub fn from_symbols_and_floating_point_probabilities_fast<F>(
824 symbols: impl IntoIterator<Item = Symbol>,
825 probabilities: &[F],
826 normalization: Option<F>,
827 ) -> Result<Self, ()>
828 where
829 F: FloatCore + core::iter::Sum<F> + AsPrimitive<Probability>,
830 Probability: AsPrimitive<usize>,
831 usize: AsPrimitive<Probability> + AsPrimitive<F>,
832 {
833 let cdf = fast_quantized_cdf::<Probability, F, PRECISION>(probabilities, normalization)?;
834 Self::from_symbols_and_cdf(symbols, cdf)
835 }
836
837 /// Slower variant of [`from_symbols_and_floating_point_probabilities_fast`].
838 ///
839 /// Similar to [`from_symbols_and_floating_point_probabilities_fast`], but the resulting
840 /// (fixed-point precision) model typically approximates the provided floating point
841 /// `probabilities` *very slightly* better. Only recommended if compression performance
842 /// is *much* more important to you than runtime as this constructor can be
843 /// significantly slower.
844 ///
845 /// See [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]
846 /// for a detailed comparison between `..._fast` and `..._perfect` constructors of
847 /// categorical entropy models.
848 ///
849 /// [`from_symbols_and_floating_point_probabilities_fast`]:
850 /// Self::from_symbols_and_floating_point_probabilities_fast
851 /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]:
852 /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect
853 #[allow(clippy::result_unit_err)]
854 pub fn from_symbols_and_floating_point_probabilities_perfect<F>(
855 symbols: impl IntoIterator<Item = Symbol>,
856 probabilities: &[F],
857 ) -> Result<Self, ()>
858 where
859 F: FloatCore + core::iter::Sum<F> + Into<f64>,
860 Probability: Into<f64> + AsPrimitive<usize>,
861 f64: AsPrimitive<Probability>,
862 usize: AsPrimitive<Probability>,
863 {
864 let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?;
865 Self::from_symbols_and_nonzero_fixed_point_probabilities(
866 symbols,
867 slots.into_iter().map(|slot| slot.weight),
868 false,
869 )
870 }
871
872 /// Deprecated constructor.
873 ///
874 /// This constructor has been deprecated in constriction version 0.4.0, and it will be
875 /// removed in constriction version 0.5.0.
876 ///
877 /// # Upgrade Instructions
878 ///
879 /// Most *new* use cases should call
880 /// [`from_symbols_and_floating_point_probabilities_fast`] instead. Using that
881 /// constructor (abbreviated as `..._fast` in the following) may lead to very slightly
882 /// larger bit rates, but it runs considerably faster.
883 ///
884 /// However, note that the `..._fast` constructor breaks binary compatibility with
885 /// `constriction` version <= 0.3.5. If you need to be able to exchange binary
886 /// compressed data with a program that uses a categorical entropy model from
887 /// `constriction` version <= 0.3.5, then call
888 /// [`from_symbols_and_floating_point_probabilities_perfect`] instead (`..._perfect` for
889 /// short). Another reason for using the `..._perfect` constructor could be if
890 /// compression performance is *much* more important to you than runtime performance.
891 /// See documentation of [`from_symbols_and_floating_point_probabilities_perfect`] for
892 /// more information.
893 ///
894 /// # Compatibility Table
895 ///
896 /// (In the following table, "encoding" refers to
897 /// [`NonContiguousCategoricalDecoderModel`])
898 ///
899 /// | constructor used for encoding → <br> ↓ constructor used for decoding ↓ | legacy <br> (this one) | [`..._perfect`] | [`..._fast`] |
900 /// | --------------------: | --------------- | --------------- | --------------- |
901 /// | **[legacy](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities)** | ✅ compatible | ✅ compatible | ❌ incompatible |
902 /// | **[`..._perfect`](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities_perfect)** | ✅ compatible | ✅ compatible | ❌ incompatible |
903 /// | **[`..._fast`](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities_fast)** | ❌ incompatible | ❌ incompatible | ✅ compatible |
904 ///
905 /// [`from_symbols_and_floating_point_probabilities_perfect`]:
906 /// Self::from_symbols_and_floating_point_probabilities_perfect
907 /// [`..._perfect`]: Self::from_symbols_and_floating_point_probabilities_perfect
908 /// [`from_symbols_and_floating_point_probabilities_fast`]:
909 /// Self::from_symbols_and_floating_point_probabilities_fast
910 /// [`..._fast`]: Self::from_symbols_and_floating_point_probabilities_fast
911 #[deprecated(
912 since = "0.4.0",
913 note = "Please use `from_symbols_and_floating_point_probabilities_fast` or \
914 `from_symbols_and_floating_point_probabilities_perfect` instead. See documentation for \
915 detailed upgrade instructions."
916 )]
917 #[allow(clippy::result_unit_err)]
918 pub fn from_symbols_and_floating_point_probabilities<F>(
919 symbols: impl IntoIterator<Item = Symbol>,
920 probabilities: &[F],
921 ) -> Result<Self, ()>
922 where
923 F: FloatCore + core::iter::Sum<F> + Into<f64>,
924 Probability: Into<f64> + AsPrimitive<usize>,
925 f64: AsPrimitive<Probability>,
926 usize: AsPrimitive<Probability>,
927 {
928 Self::from_symbols_and_floating_point_probabilities_perfect(symbols, probabilities)
929 }
930
931 /// Constructs a distribution with a PMF given in fixed point arithmetic.
932 ///
933 /// This method operates logically identically to
934 /// [`NonContiguousCategoricalDecoderModel::from_symbols_and_nonzero_fixed_point_probabilities`]
935 /// except that it constructs an [`EncoderModel`] rather than a [`DecoderModel`].
936 #[allow(clippy::result_unit_err)]
937 pub fn from_symbols_and_nonzero_fixed_point_probabilities<S, P>(
938 symbols: S,
939 probabilities: P,
940 infer_last_probability: bool,
941 ) -> Result<Self, ()>
942 where
943 S: IntoIterator<Item = Symbol>,
944 P: IntoIterator,
945 P::Item: Borrow<Probability>,
946 {
947 let symbols = symbols.into_iter();
948 let mut table =
949 HashMap::with_capacity(symbols.size_hint().0 + infer_last_probability as usize);
950 let mut symbols = accumulate_nonzero_probabilities::<_, _, _, _, _, PRECISION>(
951 symbols,
952 probabilities.into_iter(),
953 |symbol, left_sided_cumulative, probability| match table.entry(symbol) {
954 Occupied(_) => Err(()),
955 Vacant(slot) => {
956 let probability = probability.into_nonzero().ok_or(())?;
957 slot.insert((left_sided_cumulative, probability));
958 Ok(())
959 }
960 },
961 infer_last_probability,
962 )?;
963
964 if symbols.next().is_some() {
965 Err(())
966 } else {
967 Ok(Self { table })
968 }
969 }
970
971 #[allow(clippy::result_unit_err)]
972 fn from_symbols_and_cdf<S, P>(symbols: S, cdf: P) -> Result<Self, ()>
973 where
974 S: IntoIterator<Item = Symbol>,
975 P: IntoIterator<Item = Probability>,
976 {
977 let mut symbols = symbols.into_iter();
978 let mut cdf = cdf.into_iter();
979 let mut table = HashMap::with_capacity(symbols.size_hint().0);
980
981 let mut left_cumulative = cdf.next().ok_or(())?;
982 for right_cumulative in cdf {
983 let symbol = symbols.next().ok_or(())?;
984 match table.entry(symbol) {
985 Occupied(_) => return Err(()),
986 Vacant(slot) => {
987 let probability = (right_cumulative - left_cumulative)
988 .into_nonzero()
989 .ok_or(())?;
990 slot.insert((left_cumulative, probability));
991 }
992 }
993 left_cumulative = right_cumulative;
994 }
995
996 let last_symbol = symbols.next().ok_or(())?;
997 let right_cumulative = wrapping_pow2::<Probability>(PRECISION);
998 match table.entry(last_symbol) {
999 Occupied(_) => return Err(()),
1000 Vacant(slot) => {
1001 let probability = right_cumulative
1002 .wrapping_sub(&left_cumulative)
1003 .into_nonzero()
1004 .ok_or(())?;
1005 slot.insert((left_cumulative, probability));
1006 }
1007 }
1008
1009 if symbols.next().is_some() {
1010 Err(())
1011 } else {
1012 Ok(Self { table })
1013 }
1014 }
1015
1016 /// Creates a `NonContiguousCategoricalEncoderModel` from any entropy model that
1017 /// implements [`IterableEntropyModel`].
1018 ///
1019 /// Calling `NonContiguousCategoricalEncoderModel::from_iterable_entropy_model(&model)`
1020 /// is equivalent to calling `model.to_generic_encoder_model()`, where the latter
1021 /// requires bringing [`IterableEntropyModel`] into scope.
1022 pub fn from_iterable_entropy_model<'m, M>(model: &'m M) -> Self
1023 where
1024 M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
1025 {
1026 let table = model
1027 .symbol_table()
1028 .map(|(symbol, left_sided_cumulative, probability)| {
1029 (symbol, (left_sided_cumulative, probability))
1030 })
1031 .collect::<HashMap<_, _>>();
1032 Self { table }
1033 }
1034
1035 /// Returns the number of symbols in the support of the model.
1036 ///
1037 /// The support of the model is the set of all symbols that have nonzero probability.
1038 pub fn support_size(&self) -> usize {
1039 self.table.len()
1040 }
1041
1042 /// Returns the entropy in units of bits (i.e., base 2).
1043 ///
1044 /// Similar to [`IterableEntropyModel::entropy_base2`], except that
1045 /// - this type doesn't implement `IterableEntropyModel` because it doesn't store
1046 /// entries in a stable expected order;
1047 /// - because the order in which entries are stored will generally be different on each
1048 /// program execution, rounding errors will be slightly different across multiple
1049 /// program executions.
1050 pub fn entropy_base2<F>(&self) -> F
1051 where
1052 F: num_traits::Float + core::iter::Sum,
1053 Probability: Into<F>,
1054 {
1055 let entropy_scaled = self
1056 .table
1057 .values()
1058 .map(|&(_, probability)| {
1059 let probability = probability.get().into();
1060 probability * probability.log2() // probability is guaranteed to be nonzero.
1061 })
1062 .sum::<F>();
1063
1064 let whole = (F::one() + F::one()) * (Probability::one() << (PRECISION - 1)).into();
1065 F::from(PRECISION).unwrap() - entropy_scaled / whole
1066 }
1067}
1068
1069impl<'m, Symbol, Probability, M, const PRECISION: usize> From<&'m M>
1070 for NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
1071where
1072 Symbol: Hash + Eq,
1073 Probability: BitArray,
1074 M: IterableEntropyModel<'m, PRECISION, Symbol = Symbol, Probability = Probability> + ?Sized,
1075{
1076 #[inline(always)]
1077 fn from(model: &'m M) -> Self {
1078 Self::from_iterable_entropy_model(model)
1079 }
1080}
1081
1082impl<Symbol, Probability: BitArray, const PRECISION: usize> EntropyModel<PRECISION>
1083 for NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
1084where
1085 Symbol: Hash,
1086 Probability: BitArray,
1087{
1088 type Probability = Probability;
1089 type Symbol = Symbol;
1090}
1091
1092impl<Symbol, Probability: BitArray, const PRECISION: usize> EncoderModel<PRECISION>
1093 for NonContiguousCategoricalEncoderModel<Symbol, Probability, PRECISION>
1094where
1095 Symbol: Hash + Eq,
1096 Probability: BitArray,
1097{
1098 #[inline(always)]
1099 fn left_cumulative_and_probability(
1100 &self,
1101 symbol: impl Borrow<Self::Symbol>,
1102 ) -> Option<(Self::Probability, Probability::NonZero)> {
1103 self.table.get(symbol.borrow()).cloned()
1104 }
1105}
1106
1107#[cfg(test)]
1108mod tests {
1109 use super::super::super::tests::{test_iterable_entropy_model, verify_iterable_entropy_model};
1110
1111 use super::*;
1112
1113 #[test]
1114 fn non_contiguous_categorical() {
1115 let hist = [
1116 1u32, 186545, 237403, 295700, 361445, 433686, 509456, 586943, 663946, 737772, 1657269,
1117 896675, 922197, 930672, 916665, 0, 0, 0, 0, 0, 723031, 650522, 572300, 494702, 418703,
1118 347600, 1, 283500, 226158, 178194, 136301, 103158, 76823, 55540, 39258, 27988, 54269,
1119 ];
1120 let probabilities = hist.iter().map(|&x| x as f64).collect::<Vec<_>>();
1121 let symbols = "QWERTYUIOPASDFGHJKLZXCVBNM 1234567890"
1122 .chars()
1123 .collect::<Vec<_>>();
1124
1125 let fast =
1126 NonContiguousCategoricalDecoderModel::<_,u32, _, 32>::from_symbols_and_floating_point_probabilities_fast(
1127 symbols.iter().cloned(),
1128 &probabilities,
1129 None
1130 )
1131 .unwrap();
1132 test_iterable_entropy_model(&fast, symbols.iter().cloned());
1133 let kl_fast = verify_iterable_entropy_model(&fast, &hist, 1e-8);
1134
1135 let perfect =
1136 NonContiguousCategoricalDecoderModel::<_,u32, _, 32>::from_symbols_and_floating_point_probabilities_perfect(
1137 symbols.iter().cloned(),
1138 &probabilities,
1139 )
1140 .unwrap();
1141 test_iterable_entropy_model(&perfect, symbols.iter().cloned());
1142 let kl_perfect = verify_iterable_entropy_model(&perfect, &hist, 1e-8);
1143
1144 assert!(kl_perfect < kl_fast);
1145 }
1146}