1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
use crate::bit_io::{BitReader, BitReaderReversed};
use crate::cpu_kernel::CpuKernel;
use crate::decoding::errors::{FSEDecoderError, FSETableError};
use alloc::vec::Vec;
pub struct FSEDecoder<'table> {
/// An FSE state value represents an index in the FSE table.
pub state: Entry,
/// A reference to the table used for decoding.
table: &'table FSETable,
}
impl<'t> FSEDecoder<'t> {
/// Initialize a new Finite State Entropy decoder.
pub fn new(table: &'t FSETable) -> FSEDecoder<'t> {
FSEDecoder {
state: table.decode.first().copied().unwrap_or(Entry {
new_state: 0,
symbol: 0,
num_bits: 0,
base_value: 0,
num_additional_bits: 0,
}),
table,
}
}
/// Returns the byte associated with the symbol the internal cursor is pointing at.
pub fn decode_symbol(&self) -> u8 {
self.state.symbol
}
/// Initialize internal state and prepare for decoding. After this, `decode_symbol` can be called
/// to read the first symbol and `update_state` can be called to prepare to read the next symbol.
pub fn init_state<K: CpuKernel>(
&mut self,
bits: &mut BitReaderReversed<'_, K>,
) -> Result<(), FSEDecoderError> {
if self.table.accuracy_log == 0 {
return Err(FSEDecoderError::TableIsUninitialized);
}
// Defense-in-depth internal-invariant guard: in normal builds
// `crate::fse` is not externally reachable, but malformed
// tables can still arise from internal misuse, future
// `feature = "fuzz_exports"`. Validate up-front that
// `decode.len() == 1 << accuracy_log` and surface a typed
// `InvalidTableShape` error (distinct from
// `TableIsUninitialized` to keep error triage unambiguous).
// Without this, `read_entry`'s unchecked indexing (the
// `cfg(not(fuzz_exports))` arm) could hit UB on a malformed
// table in release builds. `checked_shl` covers the
// pathological case where `accuracy_log >= usize::BITS`.
// Branch cost is a single per-call check; the per-sequence
// hot path (`update_state_fast`) is unaffected.
let accuracy_log = self.table.accuracy_log;
let decode_len = self.table.decode.len();
let expected =
1usize
.checked_shl(accuracy_log.into())
.ok_or(FSEDecoderError::InvalidTableShape {
decode_len,
accuracy_log,
})?;
if decode_len != expected {
return Err(FSEDecoderError::InvalidTableShape {
decode_len,
accuracy_log,
});
}
let new_state = bits.get_bits(self.table.accuracy_log);
// SAFETY: `accuracy_log` bits read from the bitstream produce
// `new_state < (1 << accuracy_log) = table_size = decode.len()`.
// `build_decoding_table` ensures the table is sized exactly
// `1 << accuracy_log` entries. The bounds check that the
// checked indexing would emit is provably redundant. Under
// `feature = "fuzz_exports"` `read_entry` falls back to the
// bounds-checked path — see comment on `read_entry`.
self.state = self.read_entry(new_state as usize);
Ok(())
}
/// Advance the internal state to decode the next symbol in the bitstream.
///
/// # Panics
///
/// Panics if called on an `FSEDecoder` whose backing `FSETable` has
/// not been built yet (empty `decode` vec). `FSEDecoder::new`
/// produces such a decoder with a zero-default `state`; the
/// well-behaved pipeline is `new` → `init_state` → `update_state*`,
/// and `init_state` returns `Err` on an uninitialized table. This
/// assertion converts what would otherwise be UB (from the
/// unchecked indexing in `read_entry`) into a clear fail-fast
/// panic that surfaces the API misuse immediately instead of
/// leaving the bitstream and decode state silently desynchronised.
pub fn update_state<K: CpuKernel>(&mut self, bits: &mut BitReaderReversed<'_, K>) {
// Public-API safety guard: `FSEDecoder::new` builds a decoder
// with a zero-default `state` (Entry { new_state: 0, num_bits:
// 0, symbol: 0 }) regardless of whether the table was actually
// populated. A caller that constructs the decoder and then
// calls `update_state` BEFORE a successful `init_state` would
// hit `read_entry(0)` → `get_unchecked(0)` on an empty
// `decode` vec — UB in release mode, since `debug_assert!` is
// stripped. Fail-fast with `assert!` instead of silently
// returning so that misuse surfaces immediately rather than
// leaving the bitstream advanced by some bits but the decode
// state stuck at the zero-default Entry — a corruption mode
// that the caller has no way to diagnose. The well-behaved
// decode pipeline always pairs `new` → `init_state` →
// `update_state*`, so this branch is strongly biased "not
// taken" and the predictor amortises it to zero cost on the
// hot path. The corresponding `update_state_fast` is
// `pub(crate)` with controlled callers, so it relies on the
// documented precondition instead of paying for a per-call
// check.
assert!(
!self.table.decode.is_empty(),
concat!(
"FSEDecoder::update_state called on an uninitialized table; ",
"call init_state successfully before any update_state* call",
),
);
let num_bits = self.state.num_bits;
let add = bits.get_bits(num_bits);
let next_state = usize::from(self.state.new_state) + add as usize;
// SAFETY: same invariant as `update_state_fast` below —
// `new_state` and `num_bits` were paired by
// `calc_baseline_and_numbits` during table construction such
// that `new_state + (1 << num_bits) - 1 < table_size =
// decode.len()`. `add < 1 << num_bits` by definition of the
// `num_bits`-wide read, so `next_state < decode.len()`.
self.state = self.read_entry(next_state);
}
/// Read `decode[idx]` — bounds-checked under `fuzz_exports`, unchecked
/// otherwise. The call sites all hold the FSE invariant `idx <
/// decode.len()` by construction (`init_state` reads
/// `accuracy_log` bits, `update_state*` derive `next_state` from
/// `Entry.new_state + add` where `calc_baseline_and_numbits`
/// guarantees `new_state + (1 << num_bits) - 1 < table_size`).
/// Under `fuzz_exports` external code can construct a mis-shaped
/// table that violates the invariant — fall back to checked
/// indexing so a fuzz harness sees a panic rather than UB, even
/// when the fuzz binary is built in release mode (which makes
/// `debug_assert!` a no-op and is the default for `cargo fuzz`).
#[inline(always)]
fn read_entry(&self, idx: usize) -> Entry {
#[cfg(feature = "fuzz_exports")]
{
self.table.decode[idx]
}
#[cfg(not(feature = "fuzz_exports"))]
// SAFETY: see comments at the individual call sites — `idx` is
// invariant-bounded by the FSE table-build / state-transition
// contract. LLVM cannot prove this on its own because the
// invariant spans `build_decoding_table` and decode call sites.
unsafe {
*self.table.decode.get_unchecked(idx)
}
}
/// Advance the internal state **without** an individual refill check.
///
/// # Preconditions (caller-enforced)
///
/// 1. **Bit budget:** enough bits MUST be available in the bit
/// reader (e.g. via [`BitReaderReversed::ensure_bits`] with a
/// budget that covers this and any other unchecked reads in the
/// same batch).
/// 2. **State initialisation:** [`init_state`] MUST have returned
/// `Ok` on this decoder before any `update_state_fast` call.
/// Calling `update_state_fast` on a fresh `FSEDecoder::new`
/// output (which holds a zero-default `state` and may reference
/// an empty `decode` vec) would resolve to
/// `read_entry(0).get_unchecked(0)` on an empty slice — UB.
/// The empty-table guard in [`update_state`] is intentionally
/// omitted here to keep the per-sequence fast path branch-free;
/// the only call site (`decode_and_execute_sequences`) always
/// succeeds `init_state` before entering the per-sequence loop,
/// so the precondition holds by construction.
///
/// This is the "fast path" used in the interleaved sequence decode loop
/// where a single refill check covers all three FSE state updates.
///
/// [`init_state`]: Self::init_state
/// [`update_state`]: Self::update_state
#[inline(always)]
pub(crate) fn update_state_fast<K: CpuKernel>(&mut self, bits: &mut BitReaderReversed<'_, K>) {
let num_bits = self.state.num_bits;
let add = bits.get_bits_unchecked(num_bits);
let next_state = usize::from(self.state.new_state) + add as usize;
// SAFETY: `new_state` and `num_bits` were paired by
// `calc_baseline_and_numbits` during table construction such that
// `new_state + (2.pow(num_bits) - 1) < table_size = self.table.decode.len()`.
// `add` is the value of `num_bits` bits read from the bitstream, so
// `add < 2.pow(num_bits)` by construction of `BitReaderReversed::get_bits_unchecked`.
// Therefore `next_state < self.table.decode.len()` and the indexed read
// is in bounds; LLVM cannot prove this invariant on its own because it
// spans the table-build and decode call sites. Under
// `feature = "fuzz_exports"` `read_entry` falls back to bounds-checked
// indexing — see comment on `read_entry`.
self.state = self.read_entry(next_state);
}
}
/// FSE decoding involves a decoding table that describes the probabilities of
/// all literals from 0 to the highest present one
///
/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#fse-table-description>
#[derive(Debug, Clone)]
pub struct FSETable {
/// The maximum symbol in the table (inclusive). Limits the probabilities length to max_symbol + 1.
max_symbol: u8,
/// The actual table containing the decoded symbol and the compression data
/// connected to that symbol.
pub decode: Vec<Entry>, //used to decode symbols, and calculate the next state
/// Reused scratch buffer for symbol spreading to avoid per-build allocations.
symbol_spread_buffer: Vec<u8>,
/// The size of the table is stored in logarithm base 2 format,
/// with the **size of the table** being equal to `(1 << accuracy_log)`.
/// This value is used so that the decoder knows how many bits to read from the bitstream.
pub accuracy_log: u8,
/// In this context, probability refers to the likelihood that a symbol occurs in the given data.
/// Given this info, the encoder can assign shorter codes to symbols that appear more often,
/// and longer codes that appear less often, then the decoder can use the probability
/// to determine what code was assigned to what symbol.
///
/// The probability of a single symbol is a value representing the proportion of times the symbol
/// would fall within the data.
///
/// If a symbol probability is set to `-1`, it means that the probability of a symbol
/// occurring in the data is less than one.
pub symbol_probabilities: Vec<i32>, //used while building the decode Vector
}
impl FSETable {
/// Initialize a new empty Finite State Entropy decoding table.
pub fn new(max_symbol: u8) -> FSETable {
FSETable {
max_symbol,
symbol_probabilities: Vec::with_capacity(256), //will never be more than 256 symbols because u8
symbol_spread_buffer: Vec::new(),
decode: Vec::new(), //depending on acc_log.
accuracy_log: 0,
}
}
/// Reset `self` and update `self`'s state to mirror the provided table.
pub fn reinit_from(&mut self, other: &Self) {
self.reset();
self.symbol_probabilities
.extend_from_slice(&other.symbol_probabilities);
self.symbol_spread_buffer
.reserve(other.symbol_spread_buffer.len());
self.decode.extend_from_slice(&other.decode);
self.accuracy_log = other.accuracy_log;
}
/// Empty the table and clear all internal state.
pub fn reset(&mut self) {
self.symbol_probabilities.clear();
self.symbol_spread_buffer.clear();
self.decode.clear();
self.accuracy_log = 0;
}
/// Build the equivalent encoder-side table from a parsed decoder table.
pub(crate) fn to_encoder_table(&self) -> Option<crate::fse::fse_encoder::FSETable> {
if self.accuracy_log == 0 || self.symbol_probabilities.is_empty() {
return None;
}
Some(crate::fse::fse_encoder::build_table_from_probabilities(
&self.symbol_probabilities,
self.accuracy_log,
))
}
/// returns how many BYTEs (not bits) were read while building the decoder
pub fn build_decoder(&mut self, source: &[u8], max_log: u8) -> Result<usize, FSETableError> {
let max_log = max_log.min(ENTRY_MAX_ACCURACY_LOG);
self.accuracy_log = 0;
let bytes_read = self.read_probabilities(source, max_log)?;
self.build_decoding_table()?;
Ok(bytes_read)
}
/// Populate each entry's `base_value` and `num_additional_bits`
/// from a packed code-metadata table. The packed format matches
/// the donor-style `(baseline << 0) | (extra_bits << 24)` layout
/// used by the sequence section's `LL_META` / `ML_META`
/// constants. Call site indexes into `packed` by entry symbol.
///
/// MUST be called after `build_decoding_table` for tables whose
/// per-sequence decode path expects pre-computed metadata (LL /
/// ML sub-tables of the sequence section). Tables that don't
/// (HUF-weight stream) skip the call and leave the new fields
/// at their zero default.
///
/// Symbols outside `packed.len()` are treated as a corrupt
/// table — both fields stay at 0 for that entry. Upstream
/// `build_decoding_table` already caps symbols at
/// `max_symbol + 1`, so this branch is reachable only on
/// `feature = "fuzz_exports"` builds where external code can
/// stuff arbitrary entries; the safe default keeps the decode
/// hot path from observing UB even in that contrived case.
pub(crate) fn enrich_with_packed_seq_meta(&mut self, packed: &[u32]) {
for entry in self.decode.iter_mut() {
let idx = entry.symbol as usize;
if idx < packed.len() {
let meta = packed[idx];
entry.base_value = meta & 0x00FF_FFFF;
entry.num_additional_bits = (meta >> 24) as u8;
} else {
// Out-of-range symbol — reachable only on
// `feature = "fuzz_exports"` builds where external
// code can stuff arbitrary entries into the table.
// Explicitly zero both fields so the next decode
// pass observes a clean state instead of stale
// metadata from a previous (well-formed) enrich
// call. The downstream `decode_one_sequence_inline`
// hot path still surfaces the corruption via the
// existing bitstream checks; this clears prior
// values rather than introducing a new fast-fail.
entry.base_value = 0;
entry.num_additional_bits = 0;
}
}
}
/// Populate each entry's `base_value` and `num_additional_bits`
/// for the sequence section's offset table.
///
/// Offset codes follow a different shape than LL / ML: each
/// offset code `c` decodes to `base = 1 << c`, with `c` extra
/// bits read from the stream. No external meta table — the
/// computation is closed-form on the symbol value.
///
/// Per the format spec, valid offset codes are 0..=31 (offsets
/// up to 2³¹). Larger symbols would overflow the `1 << c` shift
/// — the entry's fields stay at zero so the decode path
/// surfaces the corruption downstream instead of producing a
/// wraparound shift here.
pub(crate) fn enrich_for_offsets(&mut self) {
for entry in self.decode.iter_mut() {
// Reset before the bound check so out-of-range
// symbols clear stale metadata instead of carrying it
// over from a previous enrich pass.
entry.base_value = 0;
entry.num_additional_bits = 0;
let code = entry.symbol;
if code < 32 {
entry.base_value = 1u32 << code;
entry.num_additional_bits = code;
}
}
}
/// Given the provided accuracy log, build a decoding table from that log.
pub fn build_from_probabilities(
&mut self,
acc_log: u8,
probs: &[i32],
) -> Result<(), FSETableError> {
if acc_log == 0 {
return Err(FSETableError::AccLogIsZero);
}
if acc_log > ENTRY_MAX_ACCURACY_LOG {
return Err(FSETableError::AccLogTooBig {
got: acc_log,
max: ENTRY_MAX_ACCURACY_LOG,
});
}
// Probability sum check: `build_decoding_table` assumes the
// sum of positive probabilities plus the count of `-1`
// entries (each contributing one slot at the top of the
// table) equals exactly `1 << acc_log`. Without this guard
// the wire-format `parse_wire` path validates the sum
// upstream, but callers entering through
// `build_from_probabilities` directly (the Predefined cache
// and any fuzz / external user) would silently produce a
// table where `calc_baseline_and_numbits` is given
// `symbol_count` values exceeding the symbol's actual
// `prob`, yielding `new_state` / `num_bits` pairs that can
// overshoot `decode.len()` on the unchecked `read_entry`
// hot path. Surface as a typed error so the caller can
// distinguish a malformed input from an internal failure.
// Strict probability range validation: RFC 8878 §4.1.1 admits
// only `{-1, 0, 1..=table_size}` as probability values. The
// wire-format parser never emits anything else, but
// `build_from_probabilities` is a public entry point reachable
// from fuzz harnesses and external users — leaving the gate
// open invites two attack shapes:
//
// 1. Silent malformed table: clamping `p < -1` to 0 (the
// previous `p.max(0)`) lets `[-2, ...]` satisfy a sum
// check whose remaining terms happen to add up to
// `table_size`, producing a quietly broken table.
// 2. DoS via probability-sum overflow: `[i32::MAX, i32::MAX,
// 0x42] as u32` wraps to `0x40 = 64 = 1 << 6`, satisfies
// the sum check, then `build_decoding_table` runs
// `for _ in 0..prob` against `prob = i32::MAX`, looping
// 2^31-1 times per such symbol (worst case: spread-array
// out-of-bounds panic, best case: minutes of CPU).
//
// Reject any `p > table_size` upfront so the subsequent u32
// sum is bounded (see the sum's own comment for the
// wrap-impossibility argument).
let table_size = 1u32 << acc_log;
for &p in probs {
if p < -1 || p > table_size as i32 {
return Err(FSETableError::InvalidProbability {
value: p,
table_size,
accuracy_log: acc_log,
});
}
}
// Sum the validated probs in u32. Per-element validation
// above bounds each non-`-1` value by `table_size <= 1 << 16`,
// and `probs.len() <= 256`, so the worst-case sum
// `256 * 65536 = 16M` fits u32 with 8 bits of headroom — no
// wrap possible. Keeps the public
// `ProbabilityCounterMismatch.got` field at u32 (no public
// API break).
let probability_sum: u32 = probs
.iter()
.map(|&p| if p == -1 { 1u32 } else { p as u32 })
.sum();
if probability_sum != table_size {
return Err(FSETableError::ProbabilityCounterMismatch {
got: probability_sum,
expected_sum: table_size,
symbol_probabilities: probs.to_vec(),
});
}
self.symbol_probabilities = probs.to_vec();
self.accuracy_log = acc_log;
self.build_decoding_table()
}
/// Build the actual decoding table after probabilities have been
/// read. Donor-shape single-pass build: spread symbols into the
/// scratch buffer, then write `decode` entries in one linear pass
/// — no intermediate zero-init Vec::resize, no per-call heap
/// allocation for the symbol counter (stack-allocated since the
/// max symbol count is bounded by `u8::MAX + 1 = 256`).
///
/// Wraps `build_decoding_table_inner` so the `symbol_spread_buffer`
/// scratch is unconditionally restored to `self` on every exit
/// path (success OR error) — otherwise an early `Err` from the
/// inner pass would drop the taken buffer and force a fresh
/// allocation on the next build.
///
/// On `Err` the table is also fully `reset()` after the buffer
/// restore: the inner pass mutates `self.decode` (partial push)
/// while `self.accuracy_log` / `self.symbol_probabilities` were
/// already set by the caller (`build_from_probabilities`); leaving
/// that inconsistency in place would let a subsequent `init_state`
/// pass the `accuracy_log != 0` gate and read from a partial
/// `decode` vec — UB. After `reset()` the table is in the same
/// well-defined empty state a freshly-constructed `FSETable` has;
/// any subsequent `init_state` returns `TableIsUninitialized`.
fn build_decoding_table(&mut self) -> Result<(), FSETableError> {
let mut spread = core::mem::take(&mut self.symbol_spread_buffer);
let result = self.build_decoding_table_inner(&mut spread);
self.symbol_spread_buffer = spread;
if result.is_err() {
self.reset();
}
result
}
fn build_decoding_table_inner(&mut self, spread: &mut Vec<u8>) -> Result<(), FSETableError> {
let nb_symbols = self.symbol_probabilities.len();
if nb_symbols > self.max_symbol as usize + 1 {
return Err(FSETableError::TooManySymbols { got: nb_symbols });
}
let table_size = 1 << self.accuracy_log;
// === Spread step ===
// Reuse the persistent scratch buffer; clear then resize to
// table_size. The resize is the ONLY zero-init in the entire
// build path now (previous impl also zero-init'd `decode`
// through `Vec::resize` with a default `Entry`, doubling the
// write traffic on the build path).
spread.clear();
spread.resize(table_size, 0);
// Donor `ZSTD_buildFSETable_body`-shape per-symbol counter
// (donor `symbolNext[]`). Indexed by symbol byte; initialised
// to `prob` for positive-probability symbols, `1` for `-1`
// (low-probability) symbols. The build loop below reads then
// increments `symbol_next[symbol]` per state placed, and
// derives `(num_bits, new_state)` from the running counter
// via pure shifts / `highest_bit_set` — NO division. This
// replaces the previous `calc_baseline_and_numbits` helper
// whose `num_states_total / num_state_slices` lowered to a
// runtime `divl` (~24 cycles, ~24% of FSE-build samples on
// z000033 L-5 fast — see perf annotate audit). Stack-only
// (256-symbol alphabet bounded by `u8`); zero-init covers
// every slot.
let mut symbol_next = [0u32; 256];
// Pass 1: place -1 probability symbols at the top of the
// table AND initialise `symbol_next` for them. Donor:
// `tableDecode[highThreshold--].baseValue = s; symbolNext[s]
// = 1;`.
let mut negative_idx = table_size;
for (symbol, &prob) in self
.symbol_probabilities
.iter()
.enumerate()
.take(nb_symbols)
{
if prob == -1 {
negative_idx -= 1;
spread[negative_idx] = symbol as u8;
symbol_next[symbol] = 1;
}
}
// Pass 2: distribute positive-probability symbols across the
// [0, negative_idx) range using the donor's `next_position`
// walker, and initialise `symbol_next[s] = prob` so the
// build loop's counter reaches `2*prob - 1` over `prob`
// iterations (matching donor `symbolNext[s]++` semantics).
let mut position = 0usize;
for (symbol, &prob) in self
.symbol_probabilities
.iter()
.enumerate()
.take(nb_symbols)
{
if prob <= 0 {
continue;
}
symbol_next[symbol] = prob as u32;
let symbol_u8 = symbol as u8;
for _ in 0..prob {
spread[position] = symbol_u8;
position = next_position(position, table_size);
while position >= negative_idx {
position = next_position(position, table_size);
}
}
}
// === Build step (donor formula) ===
// For each state u in 0..tableSize, donor `ZSTD_buildFSETable_body`:
// nextState = symbolNext[symbol]++
// nbBits = tableLog - ZSTD_highbit32(nextState)
// newState = (nextState << nbBits) - tableSize
//
// Identity with our previous `calc_baseline_and_numbits` was
// verified algebraically: for a symbol with positive prob N,
// `nextState` walks N..2N-1; `highest_bit_set(nextState)`
// partitions this range into "double-width" (high_bit ==
// ceil(log2(N))) and "single-width" (high_bit ==
// ceil(log2(N))+1) slices, matching the
// `num_double_width_state_slices` / `num_single_width...`
// split in the old code. For -1 prob, `symbol_next` is `1`,
// so `nextState == 1`, `high_bit == 1`, `nbBits ==
// accuracy_log`, `newState == 0` — exactly the -1 entry
// shape.
let accuracy_log = self.accuracy_log;
let table_size_u32 = table_size as u32;
self.decode.clear();
self.decode.reserve(table_size);
for &symbol in spread.iter().take(table_size) {
let next_state = symbol_next[symbol as usize];
// `next_state >= 1` by construction: upstream
// `read_probabilities` / `build_from_probabilities`
// validate that `sum(prob, treating -1 as 1) ==
// table_size`, which guarantees Pass 1 + Pass 2 above
// fully cover spread[] (no zero defaults survive
// `spread.resize(table_size, 0)`) and every symbol that
// appears in spread[] has `symbol_next[s] > 0`.
// `highest_bit_set(x)` returns `floor(log2(x)) + 1`.
symbol_next[symbol as usize] = next_state + 1;
let high_bit = highest_bit_set(next_state);
// nbBits = accuracy_log - floor(log2(next_state))
// = accuracy_log - (high_bit - 1)
// = (accuracy_log + 1) - high_bit
// For -1 prob: next_state = 1, high_bit = 1, nbBits =
// accuracy_log.
// For positive prob N: next_state in N..2N-1; max
// next_state = 2N-1 ≤ 2*table_size - 1, max high_bit
// = accuracy_log + 1 (when N == table_size), nbBits = 0.
// So nbBits ∈ [0, accuracy_log], satisfying the
// unchecked-read invariant in `FSEDecoder::read_entry`.
let nb = (accuracy_log as u32 + 1).wrapping_sub(high_bit) as u8;
// FSE invariant gate: keep the explicit `nb > accuracy_log`
// guard for `build_from_probabilities` (public surface) so
// a crafted probability vector can't silently violate
// `new_state + (1 << nb) - 1 < table_size`. With the donor
// formula `nb` derives from `high_bit` which is itself
// bounded by the table-size invariant, but a malformed
// probability accumulating beyond `table_size` could push
// high_bit > accuracy_log + 1 and wrap `nb` to a large
// u8. Reject so the unchecked indexing contract holds.
if nb > accuracy_log {
return Err(FSETableError::TableInvariantViolation {
prob: self.symbol_probabilities[symbol as usize],
symbol,
num_bits: nb,
accuracy_log,
});
}
// `next_state << nb` ranges [table_size, 2*table_size - (1 << nb)];
// subtracting `table_size` gives `new_state ∈ [0, table_size - 1]`
// which fits u16 for any `accuracy_log <= 16`
// (`ENTRY_MAX_ACCURACY_LOG`). The wire format caps
// `accuracy_log` at `FSE_MAX_TABLELOG = 9` for sequence
// tables, well below the u16 bound. Use normal
// subtraction (not wrapping_sub) so the
// implicit-overflow debug_assert catches any future
// formula bug instead of silently producing a
// malformed entry.
let new_state_u32 = (next_state << nb) - table_size_u32;
self.decode.push(Entry {
new_state: new_state_u32 as u16,
symbol,
num_bits: nb,
base_value: 0,
num_additional_bits: 0,
});
}
Ok(())
}
/// Read the accuracy log and the probability table from the source and return the number of bytes
/// read. If the size of the table is larger than the provided `max_log`, return an error.
fn read_probabilities(&mut self, source: &[u8], max_log: u8) -> Result<usize, FSETableError> {
self.symbol_probabilities.clear(); //just clear, we will fill a probability for each entry anyways. No need to force new allocs here
let mut br = BitReader::new(source);
self.accuracy_log = ACC_LOG_OFFSET + (br.get_bits(4)? as u8);
if self.accuracy_log > ENTRY_MAX_ACCURACY_LOG {
return Err(FSETableError::AccLogTooBig {
got: self.accuracy_log,
max: ENTRY_MAX_ACCURACY_LOG,
});
}
if self.accuracy_log > max_log {
return Err(FSETableError::AccLogTooBig {
got: self.accuracy_log,
max: max_log,
});
}
if self.accuracy_log == 0 {
return Err(FSETableError::AccLogIsZero);
}
let probability_sum = 1 << self.accuracy_log;
let mut probability_counter = 0;
while probability_counter < probability_sum {
let max_remaining_value = probability_sum - probability_counter + 1;
let bits_to_read = highest_bit_set(max_remaining_value);
let unchecked_value = br.get_bits(bits_to_read as usize)? as u32;
let low_threshold = ((1 << bits_to_read) - 1) - (max_remaining_value);
let mask = (1 << (bits_to_read - 1)) - 1;
let small_value = unchecked_value & mask;
let value = if small_value < low_threshold {
br.return_bits(1);
small_value
} else if unchecked_value > mask {
unchecked_value - low_threshold
} else {
unchecked_value
};
//println!("{}, {}, {}", self.symbol_probablilities.len(), unchecked_value, value);
let prob = (value as i32) - 1;
self.symbol_probabilities.push(prob);
if prob != 0 {
if prob > 0 {
probability_counter += prob as u32;
} else {
// probability -1 counts as 1
assert!(prob == -1);
probability_counter += 1;
}
} else {
//fast skip further zero probabilities
loop {
let skip_amount = br.get_bits(2)? as usize;
self.symbol_probabilities
.resize(self.symbol_probabilities.len() + skip_amount, 0);
if skip_amount != 3 {
break;
}
}
}
}
if probability_counter != probability_sum {
return Err(FSETableError::ProbabilityCounterMismatch {
got: probability_counter,
expected_sum: probability_sum,
symbol_probabilities: self.symbol_probabilities.clone(),
});
}
if self.symbol_probabilities.len() > self.max_symbol as usize + 1 {
return Err(FSETableError::TooManySymbols {
got: self.symbol_probabilities.len(),
});
}
let bytes_read = if br.bits_read().is_multiple_of(8) {
br.bits_read() / 8
} else {
(br.bits_read() / 8) + 1
};
Ok(bytes_read)
}
}
/// A single entry in an FSE table.
///
/// The first four bytes (`new_state`, `symbol`, `num_bits`) mirror the
/// classical donor `FSE_decode_t` layout used by every FSE-backed
/// decoder in the crate (sequence-section LL/ML/OF, HUF-weight
/// stream). The trailing `base_value` + `num_additional_bits`
/// fields are populated only by the LL / ML / OF tables in the
/// sequence-section decoder (donor `ZSTD_seqSymbol` shape) so the
/// per-sequence hot path can read them directly off the active
/// entry instead of issuing a second lookup into a separate
/// metadata table. HUF tables leave these two fields at their
/// default zero — the extra eight bytes per slot are a fixed cost
/// on the small HUF FSE table (≤ 64 entries) and the dominant
/// savings live in the sequence section.
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct Entry {
/// Base index for the next state. The low bits read from the bitstream are
/// added to this value to produce the final state index.
pub new_state: u16,
/// The byte that should be put in the decode output when encountering this state.
pub symbol: u8,
/// How many bits should be read from the stream when decoding this entry.
pub num_bits: u8,
/// For LL / ML / OF tables: pre-computed code baseline.
/// `actual_value = base_value + extra_bits_read`. Donor
/// `ZSTD_seqSymbol::baseValue`. Populated by the per-table
/// `enrich_for_*` post-build pass; stays 0 for FSE tables that
/// don't need it (HUF-weight stream).
pub base_value: u32,
/// For LL / ML / OF tables: number of bits to read from the
/// bitstream after the symbol has been decoded, to obtain the
/// additional value to add to `base_value`. Donor
/// `ZSTD_seqSymbol::nbAdditionalBits`. Populated alongside
/// `base_value`; stays 0 for FSE tables that don't need it.
pub num_additional_bits: u8,
}
#[cfg(target_endian = "little")]
const _: [(); 0] = [(); core::mem::offset_of!(Entry, new_state)];
#[cfg(target_endian = "little")]
const _: [(); 2] = [(); core::mem::offset_of!(Entry, symbol)];
#[cfg(target_endian = "little")]
const _: [(); 3] = [(); core::mem::offset_of!(Entry, num_bits)];
#[cfg(target_endian = "little")]
const _: [(); 4] = [(); core::mem::offset_of!(Entry, base_value)];
#[cfg(target_endian = "little")]
const _: [(); 8] = [(); core::mem::offset_of!(Entry, num_additional_bits)];
// 12 bytes: 4 (header) + 4 (base_value) + 1 (num_additional_bits)
// + 3 bytes tail padding for natural u32 alignment.
#[cfg(target_endian = "little")]
const _: [(); 12] = [(); core::mem::size_of::<Entry>()];
/// This value is added to the first 4 bits of the stream to determine the
/// `Accuracy_Log`
const ACC_LOG_OFFSET: u8 = 5;
const ENTRY_MAX_ACCURACY_LOG: u8 = 16;
fn highest_bit_set(x: u32) -> u32 {
assert!(x > 0);
u32::BITS - x.leading_zeros()
}
//utility functions for building the decoding table from probabilities
/// Calculate the position of the next entry of the table given the current
/// position and size of the table.
fn next_position(mut p: usize, table_size: usize) -> usize {
p += (table_size >> 1) + (table_size >> 3) + 3;
p &= table_size - 1;
p
}
#[cfg(test)]
mod tests {
use super::*;
use crate::decoding::errors::FSETableError;
#[test]
fn build_from_probabilities_rejects_sum_too_small() {
// acc_log=4 → table_size=16. Valid sum (treating -1 as 1)
// is 16. Use a distribution that sums to 8 (probs sum 8,
// no -1s).
let mut t = FSETable::new(8);
let probs: [i32; 4] = [4, 2, 1, 1];
let result = t.build_from_probabilities(4, &probs);
assert!(
matches!(
result,
Err(FSETableError::ProbabilityCounterMismatch { .. })
),
"expected ProbabilityCounterMismatch for sum=8 vs expected=16, got {result:?}",
);
}
#[test]
fn build_from_probabilities_rejects_sum_too_large() {
// acc_log=4 → table_size=16. Probs summing to 20 (>16)
// would over-fill the spread.
let mut t = FSETable::new(8);
let probs: [i32; 4] = [8, 6, 4, 2];
let result = t.build_from_probabilities(4, &probs);
assert!(
matches!(
result,
Err(FSETableError::ProbabilityCounterMismatch { .. })
),
"expected ProbabilityCounterMismatch for sum=20 vs expected=16, got {result:?}",
);
}
#[test]
fn build_from_probabilities_accepts_negative_one_in_sum() {
// acc_log=4 → table_size=16. Probs: 12 positive + 4 × -1
// (counted as 1 each) sum to 16. Should succeed.
let mut t = FSETable::new(8);
let probs: [i32; 6] = [8, 4, -1, -1, -1, -1];
let result = t.build_from_probabilities(4, &probs);
assert!(
result.is_ok(),
"expected Ok for sum=16 with -1s, got {result:?}"
);
}
#[test]
fn build_from_probabilities_rejects_overflow_in_sum() {
// DoS-shaped exploit: two i32::MAX values + 0x42 cast to u32
// and summed wrapping wraps to 0x40 = 64 = 1 << acc_log. The
// pre-fix u32 sum check would accept the input, then
// build_decoding_table would run `for _ in 0..prob` against
// prob = i32::MAX, looping 2^31-1 times per such symbol.
let mut t = FSETable::new(8);
let probs: [i32; 3] = [i32::MAX, i32::MAX, 0x42];
let result = t.build_from_probabilities(6, &probs);
assert!(
matches!(result, Err(FSETableError::InvalidProbability { .. })),
"expected InvalidProbability for overflow-shaped exploit, got {result:?}",
);
}
#[test]
fn build_from_probabilities_rejects_negative_below_minus_one() {
// RFC 8878 §4.1.1: probability values are in {-1, 0, positive}.
// p = -2 is outside the spec; clamping to 0 silently is wrong.
let mut t = FSETable::new(8);
let probs: [i32; 4] = [-2, 8, 4, 4];
let result = t.build_from_probabilities(4, &probs);
assert!(
matches!(
result,
Err(FSETableError::InvalidProbability { value: -2, .. })
),
"expected InvalidProbability{{value: -2, ..}}, got {result:?}",
);
}
#[test]
fn build_from_probabilities_accepts_exact_sum() {
// Sum 4+4+4+4 = 16 = 1 << 4. No -1s.
let mut t = FSETable::new(8);
let probs: [i32; 4] = [4, 4, 4, 4];
let result = t.build_from_probabilities(4, &probs);
assert!(result.is_ok(), "expected Ok for exact sum, got {result:?}");
}
}