oxideav_opus/range_decoder.rs
1//! Range decoder primitives for the Opus codec.
2//!
3//! This module implements the bit-exact range decoder described in
4//! RFC 6716 §4.1 (`docs/audio/opus/rfc6716-opus.txt`). The implementation
5//! is clean-room: every routine is transcribed from the prose and
6//! pseudocode equations in the RFC; no external library source was
7//! consulted.
8//!
9//! The range decoder is the SHARED entropy primitive that both the SILK
10//! and CELT layers of Opus invoke for every coded symbol. The
11//! [`oxideav-celt`] crate carries its own copy of the same primitive;
12//! each crate owns its copy until a shared low-level primitive crate
13//! exists in the workspace. The two copies are independent clean-room
14//! transcriptions of the same RFC sections and are expected to be
15//! behaviourally identical.
16//!
17//! The following routines are wired up:
18//!
19//! * Initialization (§4.1.1).
20//! * Symbol-update internal helper (§4.1.2).
21//! * Renormalization (§4.1.2.1).
22//! * [`RangeDecoder::decode_bin`] for power-of-two `ft` symbols (§4.1.3.1).
23//! * [`RangeDecoder::dec_bit_logp`] (§4.1.3.2).
24//! * [`RangeDecoder::dec_icdf`] for inverse-CDF table decoding (§4.1.3.3).
25//! * [`RangeDecoder::dec_bits`] for raw bits (§4.1.4).
26//! * [`RangeDecoder::dec_uint`] for uniformly-distributed integers
27//! (§4.1.5).
28//! * [`RangeDecoder::tell`] for whole-bit accounting (§4.1.6.1).
29//! * [`RangeDecoder::tell_frac`] for 1/8th-bit-precision accounting
30//! (§4.1.6.2).
31//! * [`RangeDecoder::ec_decode`] / [`RangeDecoder::ec_dec_update`] for
32//! the generic two-step symbol path (§4.1.2). These are the building
33//! blocks for custom symbol decoders that an inverse-CDF table cannot
34//! express directly — notably the CELT §4.3.2.1 coarse-energy
35//! Laplace decoder and the §4.3.3 allocation interpolation search,
36//! both of which decode against a frequency model computed at
37//! run time rather than a fixed `icdf[]` table.
38
39use crate::Error;
40
41/// Bit-exact CELT/SILK range decoder state per RFC 6716 §4.1.
42///
43/// The decoder splits the input buffer into two halves. The range
44/// coder consumes bytes from the front (MSB-first into the range
45/// state) and the raw-bit reader consumes bytes from the back
46/// (LSB-first). RFC 6716 §4.1.4 explicitly permits the two readers
47/// to overlap; the decoder MUST allow it.
48#[derive(Debug)]
49pub struct RangeDecoder<'a> {
50 /// Input bitstream backing this decoder.
51 buf: &'a [u8],
52 /// Offset of the next byte the range coder will consume (advances
53 /// forward through `buf`).
54 fwd: usize,
55 /// Number of bytes consumed by the raw-bit reader, measured from
56 /// the END of `buf`. A value of `0` means no raw bit has yet been
57 /// read; the next raw byte fetched comes from `buf[buf.len() - 1]`.
58 back: usize,
59 /// Number of unconsumed bits currently sitting in `back_window`
60 /// (0..=8 at rest, may exceed during refill).
61 back_bits_avail: u32,
62 /// Buffer of unconsumed raw bits, packed with the next bit to
63 /// emit in bit 0.
64 back_window: u32,
65 /// One-bit buffer holding the LSB of the previously-consumed
66 /// forward byte (used in the next renormalization step, §4.1.2.1).
67 rem: u32,
68 /// Range size; the renormalization invariant is `rng > 2**23`.
69 rng: u32,
70 /// Top of range minus current code value, minus one.
71 val: u32,
72 /// Running tally of whole bits the range coder has consumed
73 /// (RFC 6716 §4.1.6 `nbits_total`).
74 nbits_total: u32,
75 /// Number of raw bits the decoder has read so far. RFC 6716 §4.1.6
76 /// adds these into the bit-usage accounting on top of `nbits_total`.
77 nbits_raw: u32,
78 /// Sticky error flag: any decode that detects a corrupt frame
79 /// latches an error. Once set, subsequent decodes return zeroes
80 /// rather than corrupting the caller's state. RFC 6716 §4.1.5
81 /// recommends this behaviour for malformed integer decodes.
82 error: bool,
83}
84
85impl<'a> RangeDecoder<'a> {
86 /// Renormalization invariant from §4.1.2.1: `rng > 2**23`.
87 const RNG_MIN: u32 = 1 << 23;
88
89 /// Initialize the range decoder over `buf` per RFC 6716 §4.1.1.
90 ///
91 /// The spec defines `b0` as "the first input byte (or zero if
92 /// there are no bytes in this Opus frame)". The decoder sets
93 /// `rng = 128`, `val = 127 - (b0 >> 1)`, buffers the leftover bit
94 /// `(b0 & 1)`, then immediately invokes renormalization so the
95 /// invariant `rng > 2**23` holds before any symbol is decoded.
96 pub fn new(buf: &'a [u8]) -> Self {
97 let b0 = buf.first().copied().unwrap_or(0) as u32;
98 let mut dec = Self {
99 buf,
100 // §4.1.1: the first byte is consumed by initialization,
101 // so the next forward fetch starts at index 1.
102 fwd: if buf.is_empty() { 0 } else { 1 },
103 back: 0,
104 back_bits_avail: 0,
105 back_window: 0,
106 rem: b0 & 1,
107 rng: 128,
108 val: 127 - (b0 >> 1),
109 // §4.1.6: "nbits_total is initialized to 9 just before the
110 // initial range renormalization process completes."
111 nbits_total: 9,
112 nbits_raw: 0,
113 error: false,
114 };
115 dec.normalize();
116 dec
117 }
118
119 /// Whether this decoder has latched a `frame corrupt` error
120 /// somewhere in its history. Higher-level decoders use this to
121 /// abort the current frame and apply packet-loss concealment.
122 pub fn has_error(&self) -> bool {
123 self.error
124 }
125
126 /// Current whole-bit budget consumed by the range coder plus the
127 /// raw-bit reader, per RFC 6716 §4.1.6.1.
128 ///
129 /// `ec_tell` is defined as `nbits_total - ilog(rng)`. Raw bits are
130 /// added separately because §4.1.6 specifies that raw bits also
131 /// count against the total.
132 pub fn tell(&self) -> u32 {
133 // `ilog(rng)` is the position of the most-significant set bit
134 // of `rng`, counting from 1. The renormalization invariant
135 // keeps `rng >= 2**23`, so `lg` is always at least 24.
136 let lg = 32 - self.rng.leading_zeros();
137 self.nbits_total
138 .saturating_sub(lg)
139 .saturating_add(self.nbits_raw)
140 }
141
142 /// Current 1/8th-bit-precision budget consumed by the range coder
143 /// plus the raw-bit reader, per RFC 6716 §4.1.6.2.
144 ///
145 /// Follows §4.1.6.2 directly: from `lg = ilog(rng)`, extract
146 /// `r_Q15 = rng >> (lg - 16)` as a Q15 value in `[2^15, 2^16)`.
147 /// Three iterations of
148 /// `r_Q15 = (r_Q15*r_Q15) >> 15; lg = 2*lg + (r_Q15 >> 16)` extend
149 /// `lg` to 1/8th-bit precision. Raw bits add `8*nbits_raw` (whole
150 /// bits scaled into eighths). By construction,
151 /// `ec_tell() == ceil(ec_tell_frac() / 8.0)`.
152 pub fn tell_frac(&self) -> u32 {
153 let lg0 = 32 - self.rng.leading_zeros();
154 // §4.1.6.2: lg >= 24 after renormalization, so the shift below
155 // is well-defined. r_Q15 in [2^15, 2^16).
156 let mut r_q15 = self.rng >> (lg0 - 16);
157 // Build the 1/8th-bit-precision lg one bit at a time. The
158 // spec doubles `lg` on each of the three refinement passes;
159 // the accumulator starts at the whole-bit value `lg0`.
160 let mut lg_frac = lg0;
161 // Three passes yield three extra bits = 1/8th-bit precision.
162 for _ in 0..3 {
163 r_q15 = (r_q15 * r_q15) >> 15;
164 let bit = r_q15 >> 16;
165 lg_frac = 2 * lg_frac + bit;
166 // If `bit == 1`, halve r_Q15 so it falls back into
167 // [2^15, 2^16).
168 if bit == 1 {
169 r_q15 >>= 1;
170 }
171 }
172 // Final value = nbits_total*8 - lg_frac + nbits_raw*8.
173 self.nbits_total
174 .saturating_mul(8)
175 .saturating_sub(lg_frac)
176 .saturating_add(self.nbits_raw.saturating_mul(8))
177 }
178
179 /// Decode a single binary symbol with probability `2^-logp` of
180 /// being a "1", per RFC 6716 §4.1.3.2.
181 ///
182 /// Mathematically equivalent to `ec_decode(ft = 1<<logp)` followed
183 /// by `ec_dec_update(0, ft-1, ft)` (for a "0") or
184 /// `ec_dec_update(ft-1, ft, ft)` (for a "1"). The implementation
185 /// is multiply-and-divide-free: `r >> logp` replaces `rng/ft`, and
186 /// the discriminator collapses to a comparison.
187 pub fn dec_bit_logp(&mut self, logp: u32) -> u32 {
188 let r = self.rng;
189 let d = self.val;
190 // `s = r >> logp` corresponds to `rng/ft` with `ft = 1<<logp`
191 // (an exact shift when ft is a power of two).
192 let s = r >> logp;
193 // The "1" half corresponds to `fl = ft-1, fh = ft`, leading to
194 // val unchanged, rng = s.
195 // The "0" half is `fl = 0, fh = ft-1`, leading to
196 // val -= s, rng = r - s.
197 let bit = if d < s { 1 } else { 0 };
198 if bit == 1 {
199 self.rng = s;
200 } else {
201 self.val = d - s;
202 self.rng = r - s;
203 }
204 self.normalize();
205 bit
206 }
207
208 /// Decode `bits` raw bits per RFC 6716 §4.1.4.
209 ///
210 /// Raw bits are packed at the END of the frame: the least
211 /// significant bit of the first value is the LSB of the last
212 /// byte; reads proceed toward the front. The function returns the
213 /// raw bits in the order written — the LSB of the result holds
214 /// the bit the encoder emitted first.
215 ///
216 /// Returns `0` on errors (`bits > 32`); also returns zero-extended
217 /// bits past the end of the frame, matching §4.1.4's "the decoder
218 /// MUST continue to use zero for any further input bytes required".
219 pub fn dec_bits(&mut self, bits: u32) -> u32 {
220 if bits == 0 {
221 return 0;
222 }
223 if bits > 32 {
224 self.error = true;
225 return 0;
226 }
227 let mut window = self.back_window;
228 let mut avail = self.back_bits_avail;
229 // Refill the window until it holds enough bits to service the
230 // requested read.
231 while avail < bits {
232 let byte = if self.back < self.buf.len() {
233 self.buf[self.buf.len() - 1 - self.back]
234 } else {
235 // §4.1.4: zero-extend past the end of the frame.
236 0
237 };
238 self.back = self.back.saturating_add(1);
239 // Concatenate the new byte ABOVE the existing window so the
240 // intra-byte LSB-first packing is preserved.
241 window |= (byte as u32) << avail;
242 avail += 8;
243 }
244 let mask: u32 = if bits == 32 { !0 } else { (1u32 << bits) - 1 };
245 let result = window & mask;
246 // Consume the served bits.
247 self.back_window = window >> bits;
248 self.back_bits_avail = avail - bits;
249 self.nbits_raw += bits;
250 result
251 }
252
253 /// Decode one of `ft` equiprobable values in `0..ft`, per
254 /// RFC 6716 §4.1.5.
255 ///
256 /// Values of `ft <= 1` degenerate to the constant `0`. `ft` may
257 /// be as large as `2^32 - 1`. The §4.1.5 procedure splits the
258 /// value: the top 8 bits go through the range coder, the
259 /// remainder through raw bits. If the reconstructed value is
260 /// `>= ft`, the frame is corrupt — the decoder latches the error
261 /// flag and saturates to `ft - 1` per §4.1.5's concealment
262 /// recommendation.
263 pub fn dec_uint(&mut self, ft: u32) -> Result<u32, Error> {
264 if ft <= 1 {
265 return Ok(0);
266 }
267 // `ftb = ilog(ft - 1)`: number of bits needed for `ft - 1`.
268 let ftb = 32 - (ft - 1).leading_zeros();
269 if ftb <= 8 {
270 // Small case: a single range-coded symbol covers the whole
271 // value.
272 let t = self.decode(ft);
273 self.dec_update(t, t + 1, ft);
274 Ok(t)
275 } else {
276 // Large case: top 8 bits range-coded, remainder raw.
277 let split_bits = ftb - 8;
278 let top_ft = ((ft - 1) >> split_bits) + 1;
279 let t_hi = self.decode(top_ft);
280 self.dec_update(t_hi, t_hi + 1, top_ft);
281 let t_lo = self.dec_bits(split_bits);
282 let t = (t_hi << split_bits) | t_lo;
283 if t >= ft {
284 self.error = true;
285 Ok(ft - 1)
286 } else {
287 Ok(t)
288 }
289 }
290 }
291
292 /// Decode `fs` for a power-of-two `ft = 1<<ftb` per RFC 6716
293 /// §4.1.3.1 (`ec_decode_bin`).
294 ///
295 /// Mathematically equivalent to [`Self::decode`] with `ft = 1<<ftb`
296 /// but avoids the division: `rng / ft == rng >> ftb`. The caller is
297 /// expected to follow with [`Self::dec_update`] (or use
298 /// [`Self::dec_icdf`] which fuses the two steps).
299 ///
300 /// Returns `fs` in the range `[0, 1<<ftb)`.
301 pub fn decode_bin(&mut self, ftb: u32) -> u32 {
302 let s = self.rng >> ftb;
303 if s == 0 {
304 // Would only happen for ftb > ilog(rng). The
305 // renormalization invariant keeps ilog(rng) >= 24, so any
306 // practical ftb (icdf uses up to 8) is safe. Defensively
307 // saturate to 0.
308 return 0;
309 }
310 let ft = 1u32 << ftb;
311 let approx = (self.val / s).saturating_add(1);
312 ft - approx.min(ft)
313 }
314
315 /// Decode a symbol via an inverse-CDF table, per RFC 6716 §4.1.3.3
316 /// (`ec_dec_icdf`).
317 ///
318 /// `icdf[k]` stores `(1<<ftb) - fh[k]`, terminated by a `0` entry
319 /// (the implicit `fh[K_last] == ft`). `fl[0]` is implicitly 0; the
320 /// table values are strictly monotonically decreasing.
321 ///
322 /// Fuses the search step (find the smallest `k` such that
323 /// `fs < ft - icdf[k]`) with the range/value update, eliminating
324 /// the division. The renormalization loop runs before returning.
325 ///
326 /// Returns the decoded symbol index `k` in `0..icdf.len()-1`. On a
327 /// malformed table (no terminating zero), the decoder latches its
328 /// sticky error flag and returns 0.
329 pub fn dec_icdf(&mut self, icdf: &[u8], ftb: u32) -> u32 {
330 // `s` corresponds to `rng / ft` for `ft = 1<<ftb`.
331 let s = self.rng >> ftb;
332 // Forward walk: for each candidate k, compute
333 // next = s * icdf[k]
334 // which is the "remaining range above this symbol". The first
335 // k where `val >= next` is the decoded symbol. `t` tracks the
336 // previous step's `next` so that `rng' = t - next` matches the
337 // §4.1.2 update for `fl[k] == prev_next/s` and `fh[k] ==
338 // next/s` (with k=0 falling out to `rng - s*icdf[0]` since
339 // `t` starts at `rng`).
340 let mut t = self.rng;
341 for (k, &cell) in icdf.iter().enumerate() {
342 let next = s.saturating_mul(cell as u32);
343 if self.val >= next {
344 self.val -= next;
345 self.rng = t - next;
346 self.normalize();
347 return k as u32;
348 }
349 t = next;
350 }
351 // Malformed table: no terminator reached. §4.1.5 advises
352 // latching the corrupt-frame error and returning a saturated
353 // value.
354 self.error = true;
355 0
356 }
357
358 /// `ec_decode(ft)` per RFC 6716 §4.1.2 — the first of the two
359 /// symbol-decode steps.
360 ///
361 /// Computes the 16-bit symbol proxy
362 /// `fs = ft - min(val / (rng / ft) + 1, ft)`, which "lies within
363 /// the range of some symbol in the current context". The caller
364 /// then identifies the symbol `k` whose three-tuple
365 /// `(fl[k], fh[k], ft)` satisfies `fl[k] <= fs < fh[k]` and feeds
366 /// that tuple to [`Self::ec_dec_update`].
367 ///
368 /// This split form is needed when the frequency model is computed
369 /// at run time and cannot be pre-baked into a static inverse-CDF
370 /// table (RFC 6716 §4.3.2.1 coarse-energy Laplace decode, §4.3.3
371 /// allocation search). For fixed PDFs prefer [`Self::dec_icdf`],
372 /// which fuses both steps and avoids a division.
373 ///
374 /// `ft` must be in `1..=2**16` for the §4.1.2 derivation to hold
375 /// (the renormalization invariant keeps `rng > 2**23`, so
376 /// `rng / ft >= 1`). `ft == 0` would divide by zero; the decoder
377 /// latches its sticky error flag and returns `0` instead. The
378 /// returned `fs` lies in `[0, ft)`.
379 pub fn ec_decode(&mut self, ft: u32) -> u32 {
380 if ft == 0 {
381 self.error = true;
382 return 0;
383 }
384 self.decode(ft)
385 }
386
387 /// `ec_dec_update(fl, fh, ft)` per RFC 6716 §4.1.2 — the second of
388 /// the two symbol-decode steps.
389 ///
390 /// Narrows the range to the chosen symbol's `[fl, fh)` sub-interval
391 /// of `[0, ft)` per the §4.1.2 update equations, then renormalizes
392 /// to restore `rng > 2**23`. Pair this with the index returned by
393 /// the caller's search over the value from [`Self::ec_decode`].
394 ///
395 /// The three-tuple MUST satisfy `0 <= fl < fh <= ft` and
396 /// `1 <= ft <= 2**16`. A malformed tuple (`ft == 0`, `fh > ft`, or
397 /// `fl >= fh`) cannot come from a well-formed search; the decoder
398 /// latches its sticky error flag and leaves its state unchanged
399 /// rather than underflowing `val` or zeroing `rng`.
400 pub fn ec_dec_update(&mut self, fl: u32, fh: u32, ft: u32) {
401 if ft == 0 || fh > ft || fl >= fh {
402 self.error = true;
403 return;
404 }
405 self.dec_update(fl, fh, ft);
406 }
407
408 // ----- internal helpers -----
409
410 /// `ec_decode(ft)` per RFC 6716 §4.1.2: compute the symbol-proxy
411 /// `fs = ft - min(val / (rng / ft) + 1, ft)`.
412 fn decode(&mut self, ft: u32) -> u32 {
413 // The spec uses integer division. `rng/ft` is computed first;
414 // the divisor is then `val / (rng/ft)`. The renormalization
415 // invariant ensures `rng/ft >= 1` in all practical cases
416 // (rng > 2**23 and ft <= 2**16 on the symbol-decode path).
417 let s = self.rng / ft;
418 let approx = self.val / s + 1;
419 ft - approx.min(ft)
420 }
421
422 /// `ec_dec_update(fl, fh, ft)` per RFC 6716 §4.1.2.
423 ///
424 /// Narrows the range to the chosen symbol's interval, then runs
425 /// renormalization to restore `rng > 2**23`.
426 fn dec_update(&mut self, fl: u32, fh: u32, ft: u32) {
427 let s = self.rng / ft;
428 self.val -= s * (ft - fh);
429 if fl > 0 {
430 self.rng = s * (fh - fl);
431 } else {
432 self.rng -= s * (ft - fh);
433 }
434 self.normalize();
435 }
436
437 /// `ec_dec_normalize` per RFC 6716 §4.1.2.1.
438 ///
439 /// Until `rng > 2**23`, shift `rng` left by 8 and pull a fresh
440 /// `sym` byte. `sym` combines the previously-buffered low bit
441 /// (`rem`, as MSB) with the top 7 bits of the new byte; the LSB of
442 /// the new byte is buffered for next time. When the frame is
443 /// exhausted, zero bytes are substituted.
444 fn normalize(&mut self) {
445 while self.rng <= Self::RNG_MIN {
446 let byte = if self.fwd < self.buf.len() {
447 let b = self.buf[self.fwd];
448 self.fwd += 1;
449 b as u32
450 } else {
451 0
452 };
453 let sym = (self.rem << 7) | (byte >> 1);
454 self.rem = byte & 1;
455 self.rng <<= 8;
456 self.val = ((self.val << 8) + (255 - sym)) & 0x7FFF_FFFF;
457 // §4.1.6: each iteration adds 8 to nbits_total.
458 self.nbits_total = self.nbits_total.saturating_add(8);
459 }
460 }
461}
462
463#[cfg(test)]
464mod tests {
465 use super::*;
466
467 /// §4.1.1 initialization over an empty buffer must still satisfy
468 /// the §4.1.2.1 invariant and report `ec_tell() == 1`
469 /// (§4.1.6.1: "In a newly initialized decoder, before any symbols
470 /// have been read, this reports that 1 bit has been used").
471 #[test]
472 fn init_empty_buffer_satisfies_invariant() {
473 let dec = RangeDecoder::new(&[]);
474 assert!(dec.rng > RangeDecoder::RNG_MIN);
475 assert!(!dec.has_error());
476 assert_eq!(dec.tell(), 1);
477 }
478
479 /// Non-empty initialization also satisfies the invariant and
480 /// reports a sensible tell.
481 #[test]
482 fn init_nonempty_buffer_holds_invariant() {
483 let dec = RangeDecoder::new(&[0xAB, 0xCD, 0xEF, 0x12]);
484 assert!(dec.rng > RangeDecoder::RNG_MIN);
485 assert!(!dec.has_error());
486 assert!(dec.tell() >= 1);
487 }
488
489 /// `dec_bit_logp` should be statistically biased by the surrounding
490 /// bytes: an all-zero stream pushes `val` high, biasing toward "0",
491 /// and an all-ones stream pushes it low, biasing toward "1".
492 #[test]
493 fn dec_bit_logp_bias_with_extreme_inputs() {
494 // All-zero stream: bias toward "0".
495 let mut dec0 = RangeDecoder::new(&[0u8; 16]);
496 let mut zero_count = 0;
497 for _ in 0..32 {
498 if dec0.dec_bit_logp(1) == 0 {
499 zero_count += 1;
500 }
501 }
502 assert!(!dec0.has_error());
503 assert!(
504 zero_count > 16,
505 "all-zero stream should be biased toward 0: zero_count={}",
506 zero_count
507 );
508
509 // All-ones stream: bias toward "1".
510 let mut dec1 = RangeDecoder::new(&[0xFFu8; 16]);
511 let mut one_count = 0;
512 for _ in 0..32 {
513 if dec1.dec_bit_logp(1) == 1 {
514 one_count += 1;
515 }
516 }
517 assert!(!dec1.has_error());
518 assert!(
519 one_count > 16,
520 "all-ones stream should be biased toward 1: one_count={}",
521 one_count
522 );
523 }
524
525 /// `dec_bits` reads raw bits LSB-first from the END of the buffer.
526 /// With the last byte = 0b1010_0110, the first 4 raw bits returned
527 /// are 0b0110 = 6, and the next 4 are 0b1010 = 0xA.
528 #[test]
529 fn dec_bits_lsb_first_from_end() {
530 let mut dec = RangeDecoder::new(&[0x00, 0x00, 0xA6]);
531 let lo = dec.dec_bits(4);
532 let hi = dec.dec_bits(4);
533 assert_eq!(lo, 0x6);
534 assert_eq!(hi, 0xA);
535 assert!(!dec.has_error());
536 }
537
538 /// `dec_bits` past the end of the frame must zero-extend, per
539 /// §4.1.4 ("the decoder MUST continue to use zero for any further
540 /// input bytes required"). The function must not panic or set the
541 /// error flag in that case.
542 #[test]
543 fn dec_bits_zero_past_end_of_frame() {
544 let mut dec = RangeDecoder::new(&[0xFF, 0xFF]);
545 for _ in 0..4 {
546 let v = dec.dec_bits(4);
547 assert_eq!(v, 0xF);
548 }
549 // The next 8 bits should come back as zero (the range coder
550 // may or may not have shared bytes with the raw reader — but
551 // the *raw* side reads past-EOF as 0).
552 let pad = dec.dec_bits(8);
553 let _ = pad;
554 assert!(!dec.has_error());
555 }
556
557 /// `dec_uint(1)` is degenerate — the only value in `0..1` is 0 —
558 /// and consumes no bits.
559 #[test]
560 fn dec_uint_ft_one_is_zero_no_consumption() {
561 let mut dec = RangeDecoder::new(&[0x12, 0x34, 0x56]);
562 let before = dec.tell();
563 let v = dec.dec_uint(1).expect("ft=1 must succeed");
564 let after = dec.tell();
565 assert_eq!(v, 0);
566 assert_eq!(after, before);
567 }
568
569 /// `dec_uint` with `ft` in the small (`ftb <= 8`) regime: returned
570 /// values must lie in `[0, ft)` and never trip the error flag for
571 /// well-formed inputs.
572 #[test]
573 fn dec_uint_small_ft_in_range() {
574 let mut dec = RangeDecoder::new(&[0x42, 0x18, 0xC3, 0x7F]);
575 for _ in 0..8 {
576 let v = dec.dec_uint(200).expect("ft=200 must succeed");
577 assert!(v < 200, "v={} out of range", v);
578 }
579 assert!(!dec.has_error());
580 }
581
582 /// `dec_uint` with `ft` in the large (`ftb > 8`) regime: returned
583 /// values must lie in `[0, ft)`. The saturation path is allowed
584 /// to set the error flag, but the returned value remains bounded.
585 #[test]
586 fn dec_uint_large_ft_in_range() {
587 let buf: Vec<u8> = (0..64).collect();
588 let mut dec = RangeDecoder::new(&buf);
589 for _ in 0..8 {
590 let v = dec.dec_uint(1_000_000).expect("ft=1_000_000 must succeed");
591 assert!(v < 1_000_000, "v={} out of range", v);
592 }
593 }
594
595 /// `dec_uint` with `ft = 0` is degenerate and returns 0 without
596 /// consumption.
597 #[test]
598 fn dec_uint_ft_zero_returns_zero() {
599 let mut dec = RangeDecoder::new(&[0xAA, 0xBB, 0xCC, 0xDD]);
600 let before = dec.tell();
601 let v = dec.dec_uint(0).expect("ft=0 must succeed");
602 assert_eq!(v, 0);
603 assert_eq!(dec.tell(), before);
604 }
605
606 /// `tell()` must monotonically non-decrease across operations.
607 #[test]
608 fn tell_is_monotonic_across_decodes() {
609 let mut dec = RangeDecoder::new(&[0x55; 8]);
610 let mut prev = dec.tell();
611 for _ in 0..16 {
612 let _ = dec.dec_bit_logp(2);
613 let now = dec.tell();
614 assert!(now >= prev, "tell() went backwards: {} -> {}", prev, now);
615 prev = now;
616 }
617 }
618
619 /// `decode_bin(ftb)` must agree with the generic `decode(1<<ftb)`
620 /// path bit-for-bit (RFC 6716 §4.1.3.1: the two are mathematically
621 /// equivalent). Drive both with the same input bytes and compare.
622 #[test]
623 fn decode_bin_matches_generic_decode() {
624 for &ftb in &[1u32, 4, 8, 12, 15] {
625 let buf = [0x37u8, 0x91, 0xC4, 0x18, 0xA2, 0x5D, 0x6E, 0xFF];
626 let mut a = RangeDecoder::new(&buf);
627 let mut b = RangeDecoder::new(&buf);
628 let from_bin = a.decode_bin(ftb);
629 let from_generic = b.decode(1u32 << ftb);
630 assert_eq!(
631 from_bin, from_generic,
632 "decode_bin({ftb}) != decode(1<<{ftb})"
633 );
634 assert!(from_bin < (1u32 << ftb), "fs={from_bin} out of range");
635 }
636 }
637
638 /// RFC 6716 §4.1.6.1 specifies the identity
639 /// `ec_tell() == ceil(ec_tell_frac() / 8.0)`. Walk a decoder
640 /// forward through mixed symbol and raw-bit reads and assert this
641 /// at every step.
642 #[test]
643 fn tell_frac_consistent_with_tell() {
644 let mut dec = RangeDecoder::new(&[0xA3, 0x7F, 0x10, 0x5C, 0xE8, 0x91, 0x42, 0xB7]);
645 // §4.1.6.1: a fresh decoder reports tell() == 1.
646 assert_eq!(dec.tell(), 1);
647 for _ in 0..12 {
648 let whole = dec.tell();
649 let frac = dec.tell_frac();
650 let ceil_eighths = frac.div_ceil(8);
651 assert_eq!(
652 ceil_eighths, whole,
653 "tell()={whole} != ceil(tell_frac()={frac} / 8)={ceil_eighths}"
654 );
655 let _ = dec.dec_bit_logp(1);
656 let _ = dec.dec_bits(2);
657 }
658 }
659
660 /// `tell_frac()` of a fresh decoder sits in `[1, 8]` (since
661 /// `tell()` is `1` and the §4.1.6.1 ceiling identity holds).
662 #[test]
663 fn tell_frac_initial_within_one_bit() {
664 let dec = RangeDecoder::new(&[0xCC, 0xDD, 0xEE, 0xFF]);
665 let frac = dec.tell_frac();
666 assert!(
667 (1..=8).contains(&frac),
668 "tell_frac initial out of [1,8]: {frac}"
669 );
670 assert!(frac.div_ceil(8) == dec.tell());
671 }
672
673 /// `dec_icdf` over a binary `{ft - 1, 1}/ft` distribution must
674 /// agree with `dec_bit_logp(logp)` step-for-step — both are
675 /// special cases of `ec_decode` with `ft = 1<<ftb` (RFC 6716
676 /// §4.1.3.2 + §4.1.3.3).
677 #[test]
678 fn dec_icdf_matches_dec_bit_logp_for_binary() {
679 let buf = [0xDE, 0xAD, 0xBE, 0xEF, 0x10, 0x32, 0x54, 0x76];
680 // logp = 3 → ft = 8, P("1") = 1/8. icdf {ft-fh[0], ft-fh[1]} =
681 // {1, 0}: symbol 0 is the high-probability outcome (the "0"
682 // bit).
683 let logp = 3u32;
684 let icdf = [1u8, 0];
685 let mut a = RangeDecoder::new(&buf);
686 let mut b = RangeDecoder::new(&buf);
687 for _ in 0..16 {
688 let via_logp = a.dec_bit_logp(logp);
689 let via_icdf = b.dec_icdf(&icdf, logp);
690 assert_eq!(
691 via_logp, via_icdf,
692 "dec_bit_logp({logp}) != dec_icdf({icdf:?}, {logp})"
693 );
694 }
695 assert!(!a.has_error() && !b.has_error());
696 }
697
698 /// `dec_icdf` over a uniform `{1,1,1,1,1,1,1,1}/8` PDF must return
699 /// a symbol in `[0, 8)` every time without error.
700 #[test]
701 fn dec_icdf_uniform_returns_in_range() {
702 // Uniform 8-way PDF: fh = {1,2,3,4,5,6,7,8} → icdf =
703 // {7,6,5,4,3,2,1,0}.
704 let icdf = [7u8, 6, 5, 4, 3, 2, 1, 0];
705 let mut dec = RangeDecoder::new(&[0x42, 0x18, 0xC3, 0x7F, 0x55, 0xAA, 0x33, 0xCC]);
706 for _ in 0..16 {
707 let k = dec.dec_icdf(&icdf, 3);
708 assert!(k < 8, "icdf uniform returned {k} out of [0, 8)");
709 }
710 assert!(!dec.has_error());
711 }
712
713 /// `dec_icdf` over the degenerate single-symbol table `{0}` (only
714 /// the terminator) means symbol 0 covers the whole interval, so it
715 /// is always returned. No range mass is consumed and the error
716 /// flag stays clear.
717 #[test]
718 fn dec_icdf_single_symbol_always_zero() {
719 let icdf = [0u8];
720 let mut dec = RangeDecoder::new(&[0x77, 0x33, 0x11, 0xAA]);
721 let before_tell = dec.tell();
722 for _ in 0..4 {
723 let k = dec.dec_icdf(&icdf, 3);
724 assert_eq!(k, 0);
725 }
726 assert!(dec.tell() >= before_tell);
727 assert!(!dec.has_error());
728 }
729
730 /// `tell_frac()` is monotonically non-decreasing across mixed ops
731 /// (§4.1.6.2 inherits the monotonicity of `ec_tell` since the
732 /// procedure only adds bits).
733 #[test]
734 fn tell_frac_is_monotonic() {
735 let mut dec = RangeDecoder::new(&[0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88]);
736 // Uniform 8-way icdf so each call burns ~3 bits.
737 let icdf = [7u8, 6, 5, 4, 3, 2, 1, 0];
738 let mut prev = dec.tell_frac();
739 for i in 0..24 {
740 match i % 3 {
741 0 => {
742 let _ = dec.dec_bit_logp(2);
743 }
744 1 => {
745 let _ = dec.dec_icdf(&icdf, 3);
746 }
747 _ => {
748 let _ = dec.dec_bits(2);
749 }
750 }
751 let now = dec.tell_frac();
752 assert!(
753 now >= prev,
754 "tell_frac() went backwards: {} -> {}",
755 prev,
756 now
757 );
758 prev = now;
759 }
760 }
761
762 /// `dec_bits(0)` returns 0 and consumes nothing.
763 #[test]
764 fn dec_bits_zero_width_is_noop() {
765 let mut dec = RangeDecoder::new(&[0x12, 0x34, 0x56]);
766 let before = dec.tell();
767 let v = dec.dec_bits(0);
768 assert_eq!(v, 0);
769 assert_eq!(dec.tell(), before);
770 assert!(!dec.has_error());
771 }
772
773 /// `dec_bits` with an over-large width sets the error flag and
774 /// returns 0 (guard against caller misuse).
775 #[test]
776 fn dec_bits_oversize_latches_error() {
777 let mut dec = RangeDecoder::new(&[0xAA, 0xBB, 0xCC, 0xDD]);
778 let v = dec.dec_bits(33);
779 assert_eq!(v, 0);
780 assert!(dec.has_error());
781 }
782
783 /// The public two-step `ec_decode` / `ec_dec_update` path must
784 /// reproduce, symbol for symbol, what the fused `dec_icdf` produces
785 /// for the same fixed PDF. This is the RFC 6716 §4.1.2 ↔ §4.1.3.3
786 /// equivalence: `dec_icdf` is exactly `ec_decode(1<<ftb)` followed
787 /// by a search and `ec_dec_update`. We drive both decoders over
788 /// identical input bytes and assert they stay in lockstep.
789 #[test]
790 fn ec_decode_update_matches_dec_icdf_for_fixed_pdf() {
791 // Uniform 8-way PDF: fh = {1,2,..,8}, ft = 8, icdf = {7,..,0}.
792 let icdf = [7u8, 6, 5, 4, 3, 2, 1, 0];
793 let ftb = 3u32;
794 let ft = 1u32 << ftb;
795 let buf = [0x42u8, 0x18, 0xC3, 0x7F, 0x55, 0xAA, 0x33, 0xCC];
796 let mut fused = RangeDecoder::new(&buf);
797 let mut split = RangeDecoder::new(&buf);
798 for _ in 0..16 {
799 let k_fused = fused.dec_icdf(&icdf, ftb);
800
801 // Reconstruct the same decode via the public split steps.
802 let fs = split.ec_decode(ft);
803 // icdf[k] == ft - fh[k]; fl[k] == fh[k-1] (fl[0] == 0).
804 // Find the symbol whose [fl, fh) contains fs.
805 let mut k_split = 0u32;
806 let mut fl = 0u32;
807 let mut fh = ft - icdf[0] as u32;
808 for (idx, w) in icdf.windows(2).enumerate() {
809 if fs < fh {
810 break;
811 }
812 fl = ft - w[0] as u32;
813 fh = ft - w[1] as u32;
814 k_split = (idx + 1) as u32;
815 }
816 split.ec_dec_update(fl, fh, ft);
817
818 assert_eq!(
819 k_fused, k_split,
820 "fused dec_icdf and split ec_decode/ec_dec_update diverged"
821 );
822 }
823 assert!(!fused.has_error() && !split.has_error());
824 }
825
826 /// `ec_decode(ft)` returns a value in `[0, ft)` for every well-formed
827 /// `ft`, matching the §4.1.2 derivation `fs = ft - min(.., ft)`.
828 #[test]
829 fn ec_decode_returns_in_range() {
830 let buf = [0x37u8, 0x91, 0xC4, 0x18, 0xA2, 0x5D, 0x6E, 0xFF];
831 for &ft in &[2u32, 7, 100, 1000, 1 << 16] {
832 let mut dec = RangeDecoder::new(&buf);
833 let fs = dec.ec_decode(ft);
834 assert!(fs < ft, "ec_decode({ft}) = {fs} out of [0, {ft})");
835 assert!(!dec.has_error());
836 }
837 }
838
839 /// `ec_decode(0)` is malformed (division by zero in the §4.1.2
840 /// formula); it must latch the error flag and return 0 rather than
841 /// panic.
842 #[test]
843 fn ec_decode_ft_zero_latches_error() {
844 let mut dec = RangeDecoder::new(&[0x11, 0x22, 0x33, 0x44]);
845 let fs = dec.ec_decode(0);
846 assert_eq!(fs, 0);
847 assert!(dec.has_error());
848 }
849
850 /// `ec_dec_update` with a malformed tuple (`fl >= fh`, `fh > ft`,
851 /// or `ft == 0`) latches the error flag and leaves state untouched,
852 /// guarding against an underflow of `val` or a zeroing of `rng`.
853 #[test]
854 fn ec_dec_update_rejects_malformed_tuple() {
855 // fl >= fh
856 let mut a = RangeDecoder::new(&[0xAA, 0xBB, 0xCC, 0xDD]);
857 a.ec_dec_update(4, 4, 8);
858 assert!(a.has_error());
859
860 // fh > ft
861 let mut b = RangeDecoder::new(&[0xAA, 0xBB, 0xCC, 0xDD]);
862 b.ec_dec_update(0, 9, 8);
863 assert!(b.has_error());
864
865 // ft == 0
866 let mut c = RangeDecoder::new(&[0xAA, 0xBB, 0xCC, 0xDD]);
867 c.ec_dec_update(0, 1, 0);
868 assert!(c.has_error());
869 }
870
871 /// The public split path must also reproduce the `dec_uint` small
872 /// regime (`ftb <= 8`), which is internally `decode(ft)` followed by
873 /// `dec_update(t, t+1, ft)`. Drive `dec_uint` and the public
874 /// `ec_decode` / `ec_dec_update` in lockstep for a small `ft`.
875 #[test]
876 fn ec_decode_update_matches_dec_uint_small_regime() {
877 let buf = [0x42u8, 0x18, 0xC3, 0x7F, 0x55, 0xAA, 0x33, 0xCC];
878 let ft = 200u32; // ftb <= 8 → small dec_uint regime
879 let mut via_uint = RangeDecoder::new(&buf);
880 let mut via_split = RangeDecoder::new(&buf);
881 for _ in 0..8 {
882 let u = via_uint.dec_uint(ft).expect("ft=200 small regime");
883 let t = via_split.ec_decode(ft);
884 via_split.ec_dec_update(t, t + 1, ft);
885 assert_eq!(u, t, "dec_uint and split ec_decode/update diverged");
886 }
887 assert!(!via_uint.has_error() && !via_split.has_error());
888 }
889}