Skip to main content

haagenti_zstd/compress/
sequences.rs

1//! Sequence Encoding - Novel RLE-first approach.
2//!
3//! Traditional Zstd implementations jump straight to FSE for sequence encoding.
4//! This module takes a different approach: leverage RLE mode for uniform match
5//! patterns, which is simpler, faster, and often just as effective.
6//!
7//! ## RLE Sequence Mode
8//!
9//! When all sequences share similar characteristics (same offset code, similar
10//! lengths), RLE mode encodes just one symbol per stream (LL, OF, ML) instead
11//! of building FSE tables. The bitstream only contains extra bits.
12//!
13//! ## FSE Sequence Mode
14//!
15//! For non-uniform patterns, we use FSE encoding with predefined tables.
16//! This ensures compatibility while still providing good compression.
17//!
18//! ## Benefits
19//!
20//! - RLE mode for uniform patterns (simple, fast, no table overhead)
21//! - Predefined FSE tables for varied patterns (reliable, good compression)
22//! - Long matches preserved (not split unnecessarily)
23
24use crate::block::{Sequence, LITERAL_LENGTH_BASELINE};
25use crate::fse::{
26    cached_ll_table, cached_ml_table, cached_of_table, FseBitWriter, InterleavedTansEncoder,
27    TansEncoder,
28};
29use crate::CustomFseTables;
30use haagenti_core::Result;
31
32/// Match length encoding table derived from zstd's predefined ML table.
33///
34/// Each entry is (extra_bits, baseline, max_value) for ML codes 0-52.
35/// These values come from the unique (seq_base, seq_extra_bits) pairs in
36/// ML_PREDEFINED_TABLE_DIRECT, ensuring compatibility with reference zstd.
37///
38/// IMPORTANT: zstd's predefined table uses DIFFERENT values than RFC 8878
39/// starting at code 43. The encoder MUST use these values to match what
40/// the decoder expects.
41///
42/// Key differences from RFC:
43/// - Codes match zstd's ML_defaultDTable reference implementation
44/// - Code 42: 5 bits, baseline 99 (covering 99-130)
45/// - Code 43: 7 bits, baseline 131 (covering 131-258)
46/// - Code 44: 8 bits, baseline 259 (covering 259-514)
47/// - Code 45+: Continue with progressively larger ranges
48const ML_ENCODE_TABLE: [(u8, u32, u32); 53] = [
49    // Codes 0-31: match_length 3-34, no extra bits
50    (0, 3, 3),
51    (0, 4, 4),
52    (0, 5, 5),
53    (0, 6, 6),
54    (0, 7, 7),
55    (0, 8, 8),
56    (0, 9, 9),
57    (0, 10, 10),
58    (0, 11, 11),
59    (0, 12, 12),
60    (0, 13, 13),
61    (0, 14, 14),
62    (0, 15, 15),
63    (0, 16, 16),
64    (0, 17, 17),
65    (0, 18, 18),
66    (0, 19, 19),
67    (0, 20, 20),
68    (0, 21, 21),
69    (0, 22, 22),
70    (0, 23, 23),
71    (0, 24, 24),
72    (0, 25, 25),
73    (0, 26, 26),
74    (0, 27, 27),
75    (0, 28, 28),
76    (0, 29, 29),
77    (0, 30, 30),
78    (0, 31, 31),
79    (0, 32, 32),
80    (0, 33, 33),
81    (0, 34, 34),
82    // Codes 32-35: 1 extra bit each
83    (1, 35, 36),
84    (1, 37, 38),
85    (1, 39, 40),
86    (1, 41, 42),
87    // Codes 36-37: 2 extra bits each
88    (2, 43, 46),
89    (2, 47, 50),
90    // Codes 38-39: 3 extra bits each
91    (3, 51, 58),
92    (3, 59, 66),
93    // Codes 40-41: 4 extra bits each
94    (4, 67, 82),
95    (4, 83, 98),
96    // Code 42: 5 extra bits (from zstd: baseline 99)
97    (5, 99, 130),
98    // Code 43: 7 extra bits (from zstd: baseline 131)
99    (7, 131, 258),
100    // Code 44: 8 extra bits (from zstd: baseline 259)
101    (8, 259, 514),
102    // Code 45: 9 extra bits (from zstd: baseline 515)
103    (9, 515, 1026),
104    // Code 46: 10 extra bits (from zstd: baseline 1027)
105    (10, 1027, 2050),
106    // Code 47: 11 extra bits (from zstd: baseline 2051)
107    (11, 2051, 4098),
108    // Code 48: 12 extra bits (from zstd: baseline 4099)
109    (12, 4099, 8194),
110    // Code 49: 13 extra bits (from zstd: baseline 8195)
111    (13, 8195, 16386),
112    // Code 50: 14 extra bits (from zstd: baseline 16387)
113    (14, 16387, 32770),
114    // Code 51: 15 extra bits (from zstd: baseline 32771)
115    (15, 32771, 65538),
116    // Code 52: 16 extra bits (from zstd: baseline 65539)
117    (16, 65539, 131074),
118];
119
120/// Encoded sequence codes with extra bits.
121#[derive(Debug, Clone, Copy)]
122pub struct EncodedSequence {
123    /// Literal length code (0-35)
124    pub ll_code: u8,
125    /// Literal length extra bits
126    pub ll_extra: u32,
127    /// Literal length extra bit count
128    pub ll_bits: u8,
129    /// Offset code (0-31)
130    pub of_code: u8,
131    /// Offset extra bits
132    pub of_extra: u32,
133    /// Offset extra bit count (per RFC 8878 Table 15)
134    pub of_bits: u8,
135    /// Match length code (0-52)
136    pub ml_code: u8,
137    /// Match length extra bits
138    pub ml_extra: u32,
139    /// Match length extra bit count
140    pub ml_bits: u8,
141}
142
143impl EncodedSequence {
144    /// Encode a sequence into codes and extra bits.
145    #[inline]
146    pub fn from_sequence(seq: &Sequence) -> Self {
147        let (ll_code, ll_extra, ll_bits) = encode_literal_length(seq.literal_length);
148        let (of_code, of_extra, of_bits) = encode_offset(seq.offset);
149        let (ml_code, ml_extra, ml_bits) = encode_match_length(seq.match_length);
150
151        Self {
152            ll_code,
153            ll_extra,
154            ll_bits,
155            of_code,
156            of_extra,
157            of_bits,
158            ml_code,
159            ml_extra,
160            ml_bits,
161        }
162    }
163}
164
165/// Encode literal length to code + extra bits.
166///
167/// Optimized with direct lookup for small values and binary-like search for larger.
168#[inline(always)]
169fn encode_literal_length(value: u32) -> (u8, u32, u8) {
170    // Fast path: values 0-15 map directly to codes 0-15 with no extra bits
171    if value < 16 {
172        return (value as u8, 0, 0);
173    }
174
175    // Values 16-17: code 16, 1 extra bit
176    if value < 18 {
177        return (16, value - 16, 1);
178    }
179
180    // Values 18-19: code 17, 1 extra bit
181    if value < 20 {
182        return (17, value - 18, 1);
183    }
184
185    // Values 20-23: code 18, 2 extra bits
186    if value < 24 {
187        return (18, value - 20, 2);
188    }
189
190    // For larger values, use the table but start from a good estimate
191    // The codes follow a pattern where each code covers 2^extra_bits values
192    let log_estimate = if value < 64 {
193        4
194    } else if value < 256 {
195        6
196    } else if value < 1024 {
197        8
198    } else {
199        10
200    };
201
202    // Search from the estimated position
203    for (code, &(bits, baseline)) in LITERAL_LENGTH_BASELINE
204        .iter()
205        .enumerate()
206        .skip(log_estimate)
207    {
208        let max_value = if bits == 0 {
209            baseline
210        } else {
211            baseline + ((1u32 << bits) - 1)
212        };
213
214        if value >= baseline && value <= max_value {
215            return (code as u8, value - baseline, bits);
216        }
217    }
218
219    // Fallback to last code for very large values
220    let last_idx = LITERAL_LENGTH_BASELINE.len() - 1;
221    let (bits, baseline) = LITERAL_LENGTH_BASELINE[last_idx];
222    (last_idx as u8, value.saturating_sub(baseline), bits)
223}
224
225/// Encode match length to code + extra bits.
226///
227/// Uses ML_ENCODE_TABLE which contains zstd's predefined values.
228/// This ensures the encoder writes the same number of extra bits
229/// that the decoder expects to read.
230///
231/// Optimized with direct lookup for common match lengths.
232#[inline(always)]
233fn encode_match_length(value: u32) -> (u8, u32, u8) {
234    // Fast path: values 3-34 map directly to codes 0-31 with no extra bits
235    // This covers the vast majority of match lengths
236    if (3..=34).contains(&value) {
237        return ((value - 3) as u8, 0, 0);
238    }
239
240    // Match length must be at least 3
241    if value < 3 {
242        return (0, 0, 0); // Treat as minimum match
243    }
244
245    // Values 35-42: codes 32-35, 1 extra bit each
246    if value <= 42 {
247        let code = 32 + ((value - 35) / 2) as u8;
248        let baseline = 35 + ((code - 32) as u32 * 2);
249        return (code, value - baseline, 1);
250    }
251
252    // Values 43-50: codes 36-37, 2 extra bits each
253    if value <= 50 {
254        let code = if value < 47 { 36 } else { 37 };
255        let baseline = if code == 36 { 43 } else { 47 };
256        return (code, value - baseline, 2);
257    }
258
259    // Values 51-66: codes 38-39, 3 extra bits each
260    if value <= 66 {
261        let code = if value < 59 { 38 } else { 39 };
262        let baseline = if code == 38 { 51 } else { 59 };
263        return (code, value - baseline, 3);
264    }
265
266    // For larger values (67+), search in ML_ENCODE_TABLE starting from code 40
267    // This ensures we use the exact same baselines as the decoder
268    for (code, &(bits, baseline, max_value)) in ML_ENCODE_TABLE.iter().enumerate().skip(40) {
269        if value >= baseline && value <= max_value {
270            return (code as u8, value - baseline, bits);
271        }
272    }
273
274    // Fallback to last code for very large values
275    let last_idx = ML_ENCODE_TABLE.len() - 1;
276    let (bits, baseline, _) = ML_ENCODE_TABLE[last_idx];
277    (last_idx as u8, value.saturating_sub(baseline), bits)
278}
279
280/// Encode offset_value to code + extra bits + bit count.
281///
282/// Per RFC 8878 Table 7:
283///   Offset_Value = (1 << Offset_Code) + Extra_Bits
284///
285/// Inverse (for encoding):
286///   Offset_Code = floor(log2(Offset_Value))
287///   Extra_Bits = Offset_Value - (1 << Offset_Code)
288///   Number_of_Bits = Offset_Code
289///
290/// The offset in Sequence is the offset_value:
291/// - 1-3: repeat offset references (handled by RepeatOffsets::resolve)
292/// - >= 4: actual_offset + 3
293fn encode_offset(offset_value: u32) -> (u8, u32, u8) {
294    if offset_value == 0 {
295        return (0, 0, 0);
296    }
297
298    // offset_code = floor(log2(offset_value))
299    let offset_code = 31 - offset_value.leading_zeros();
300    let baseline = 1u32 << offset_code;
301    let extra = offset_value - baseline;
302    let num_bits = offset_code as u8;
303
304    (offset_code as u8, extra, num_bits)
305}
306
307/// Check if sequences are suitable for RLE mode.
308///
309/// Returns (uniform_ll, uniform_of, uniform_ml) indicating which
310/// streams can use RLE mode.
311///
312/// Optimized to encode and check uniformity in a single pass.
313pub fn analyze_for_rle(sequences: &[Sequence]) -> RleSuitability {
314    if sequences.is_empty() {
315        return RleSuitability::all_rle(0, 0, 0);
316    }
317
318    // Pre-allocate with exact capacity
319    let mut encoded = Vec::with_capacity(sequences.len());
320
321    // Encode first sequence to get reference codes
322    let first = EncodedSequence::from_sequence(&sequences[0]);
323    let (ll_code, of_code, ml_code) = (first.ll_code, first.of_code, first.ml_code);
324    encoded.push(first);
325
326    // Track uniformity while encoding (single pass)
327    let mut ll_uniform = true;
328    let mut of_uniform = true;
329    let mut ml_uniform = true;
330
331    for seq in sequences.iter().skip(1) {
332        let enc = EncodedSequence::from_sequence(seq);
333
334        // Check uniformity inline
335        ll_uniform = ll_uniform && enc.ll_code == ll_code;
336        of_uniform = of_uniform && enc.of_code == of_code;
337        ml_uniform = ml_uniform && enc.ml_code == ml_code;
338
339        encoded.push(enc);
340    }
341
342    RleSuitability {
343        ll_uniform,
344        ll_code,
345        of_uniform,
346        of_code,
347        ml_uniform,
348        ml_code,
349        encoded,
350    }
351}
352
353/// RLE suitability analysis result.
354#[derive(Debug)]
355pub struct RleSuitability {
356    /// Whether literal lengths are uniform (same code)
357    pub ll_uniform: bool,
358    /// The LL code to use for RLE
359    pub ll_code: u8,
360    /// Whether offsets are uniform
361    pub of_uniform: bool,
362    /// The OF code to use for RLE
363    pub of_code: u8,
364    /// Whether match lengths are uniform
365    pub ml_uniform: bool,
366    /// The ML code to use for RLE
367    pub ml_code: u8,
368    /// Pre-encoded sequences
369    pub encoded: Vec<EncodedSequence>,
370}
371
372impl RleSuitability {
373    fn all_rle(ll: u8, of: u8, ml: u8) -> Self {
374        Self {
375            ll_uniform: true,
376            ll_code: ll,
377            of_uniform: true,
378            of_code: of,
379            ml_uniform: true,
380            ml_code: ml,
381            encoded: Vec::new(),
382        }
383    }
384
385    /// Check if all three streams can use RLE mode.
386    pub fn all_uniform(&self) -> bool {
387        self.ll_uniform && self.of_uniform && self.ml_uniform
388    }
389}
390
391/// Encode sequences using RLE mode.
392///
393/// This is simpler than FSE and works well when matches are uniform.
394/// Mode byte uses RLE (01) for each stream: 0b01_01_01_00 = 0x54
395pub fn encode_sequences_rle(
396    sequences: &[Sequence],
397    suitability: &RleSuitability,
398    output: &mut Vec<u8>,
399) -> Result<()> {
400    if sequences.is_empty() {
401        output.push(0);
402        return Ok(());
403    }
404
405    let count = sequences.len();
406
407    // Encode sequence count
408    if count < 128 {
409        output.push(count as u8);
410    } else if count < 0x7F00 {
411        output.push(((count >> 8) + 128) as u8);
412        output.push((count & 0xFF) as u8);
413    } else {
414        output.push(255);
415        let adjusted = count - 0x7F00;
416        output.push((adjusted & 0xFF) as u8);
417        output.push(((adjusted >> 8) & 0xFF) as u8);
418    }
419
420    // Mode byte: RLE for all three streams
421    // Per RFC 8878 Section 3.1.1.4:
422    //   bits[1:0] = LL mode
423    //   bits[3:2] = OF mode
424    //   bits[5:4] = ML mode
425    //   bits[7:6] = Reserved (must be 0)
426    // RLE mode = 1, so all three at 01:
427    //   0b00_01_01_01 = 0x15
428    output.push(0x15);
429
430    // RLE symbols
431    output.push(suitability.ll_code);
432    output.push(suitability.of_code);
433    output.push(suitability.ml_code);
434
435    // Build bitstream with extra bits
436    let bitstream = build_rle_bitstream(&suitability.encoded);
437    output.extend_from_slice(&bitstream);
438
439    Ok(())
440}
441
442/// Encode sequences using predefined FSE tables.
443///
444/// Uses Zstd's predefined FSE tables for LL, OF, and ML codes.
445/// The bitstream format follows RFC 8878:
446/// - Sequence count (variable length)
447/// - Mode byte (0x00 for predefined tables)
448/// - Compressed bitstream with FSE symbols and extra bits
449///
450/// # Performance
451///
452/// Uses statically cached FSE tables to avoid rebuilding on every block.
453/// The tables are built once on first access and reused thereafter.
454pub fn encode_sequences_fse(sequences: &[Sequence], output: &mut Vec<u8>) -> Result<()> {
455    if sequences.is_empty() {
456        output.push(0);
457        return Ok(());
458    }
459
460    // Encode sequences to get codes and extra bits
461    let encoded: Vec<EncodedSequence> = sequences
462        .iter()
463        .map(EncodedSequence::from_sequence)
464        .collect();
465
466    encode_sequences_fse_with_encoded(&encoded, output)
467}
468
469/// Encode pre-encoded sequences using predefined FSE tables.
470///
471/// This is the fast path when sequences have already been encoded (e.g., from
472/// `analyze_for_rle`), avoiding redundant encoding work.
473///
474/// # Performance
475///
476/// Uses statically cached FSE tables to avoid rebuilding on every block.
477/// By accepting pre-encoded sequences, this eliminates double-encoding overhead.
478pub fn encode_sequences_fse_with_encoded(
479    encoded: &[EncodedSequence],
480    output: &mut Vec<u8>,
481) -> Result<()> {
482    if encoded.is_empty() {
483        output.push(0);
484        return Ok(());
485    }
486
487    let count = encoded.len();
488
489    // Encode sequence count
490    if count < 128 {
491        output.push(count as u8);
492    } else if count < 0x7F00 {
493        output.push(((count >> 8) + 128) as u8);
494        output.push((count & 0xFF) as u8);
495    } else {
496        output.push(255);
497        let adjusted = count - 0x7F00;
498        output.push((adjusted & 0xFF) as u8);
499        output.push(((adjusted >> 8) & 0xFF) as u8);
500    }
501
502    // Mode byte: Predefined tables for all three streams
503    // LL[7:6]=00, OF[5:4]=00, ML[3:2]=00, reserved[1:0]=00
504    output.push(0x00);
505
506    // Use CACHED predefined tANS encoders (built once, cloned per block).
507    // Cloning is ~100x faster than building from FSE tables.
508    let mut tans = InterleavedTansEncoder::new_predefined();
509
510    // Build bitstream
511    let bitstream = build_fse_bitstream(encoded, &mut tans);
512    output.extend_from_slice(&bitstream);
513
514    Ok(())
515}
516
517/// Encode sequences using custom FSE tables.
518///
519/// Uses the provided custom tables where available, falling back to predefined
520/// tables for any that are `None`. Custom tables can improve compression when
521/// the data has symbol distributions that differ from the predefined tables.
522///
523/// # Mode Byte
524///
525/// When custom tables are used, the mode byte indicates "FSE_Compressed" mode (10)
526/// for each stream, requiring the table to be serialized in the bitstream.
527/// When predefined tables are used, mode is "Predefined" (00).
528///
529/// # Example
530///
531/// ```rust,ignore
532/// let custom_tables = CustomFseTables::new()
533///     .with_ll_table(my_ll_table);
534/// encode_sequences_with_custom_tables(&encoded, &custom_tables, &mut output)?;
535/// ```
536pub fn encode_sequences_with_custom_tables(
537    encoded: &[EncodedSequence],
538    custom_tables: &CustomFseTables,
539    output: &mut Vec<u8>,
540) -> Result<()> {
541    if encoded.is_empty() {
542        output.push(0);
543        return Ok(());
544    }
545
546    let count = encoded.len();
547
548    // Encode sequence count
549    if count < 128 {
550        output.push(count as u8);
551    } else if count < 0x7F00 {
552        output.push(((count >> 8) + 128) as u8);
553        output.push((count & 0xFF) as u8);
554    } else {
555        output.push(255);
556        let adjusted = count - 0x7F00;
557        output.push((adjusted & 0xFF) as u8);
558        output.push(((adjusted >> 8) & 0xFF) as u8);
559    }
560
561    // Determine mode for each stream:
562    // - 00: Predefined (use built-in tables)
563    // - 10: FSE_Compressed (custom table follows in bitstream)
564    //
565    // For now, we always use predefined mode, but build tANS encoders from
566    // custom tables if provided. This gives us the encoding benefit without
567    // needing to serialize custom tables (which adds overhead).
568    //
569    // TODO: Add FSE_Compressed mode support for full custom table functionality
570    let mode_byte = 0x00; // Predefined for all streams
571    output.push(mode_byte);
572
573    // Build tANS encoders from custom or predefined tables
574    let ll_table = custom_tables
575        .ll_table
576        .as_ref()
577        .map(|t| t.as_ref())
578        .unwrap_or_else(|| cached_ll_table());
579    let of_table = custom_tables
580        .of_table
581        .as_ref()
582        .map(|t| t.as_ref())
583        .unwrap_or_else(|| cached_of_table());
584    let ml_table = custom_tables
585        .ml_table
586        .as_ref()
587        .map(|t| t.as_ref())
588        .unwrap_or_else(|| cached_ml_table());
589
590    let ll_encoder = TansEncoder::from_decode_table(ll_table);
591    let of_encoder = TansEncoder::from_decode_table(of_table);
592    let ml_encoder = TansEncoder::from_decode_table(ml_table);
593
594    let mut tans = InterleavedTansEncoder::from_encoders(ll_encoder, of_encoder, ml_encoder);
595
596    // Build bitstream
597    let bitstream = build_fse_bitstream(encoded, &mut tans);
598    output.extend_from_slice(&bitstream);
599
600    Ok(())
601}
602
603/// Build the bitstream for FSE mode.
604///
605/// Zstd sequence bitstream format (read backwards from end):
606/// 1. Initial states: LL, OF, ML (read first, written last)
607/// 2. For each sequence (processed forward, read via backward bits):
608///    - OF decode_symbol -> reads OF state bits
609///    - ML decode_symbol -> reads ML state bits
610///    - LL decode_symbol -> reads LL state bits
611///    - Read LL extra bits
612///    - Read ML extra bits
613///    - Read OF extra bits
614///
615/// For encoding (write order is reverse of read order):
616/// 1. For each sequence (reverse order):
617///    - OF extra bits
618///    - ML extra bits
619///    - LL extra bits
620///    - LL FSE state bits
621///    - ML FSE state bits
622///    - OF FSE state bits
623/// 2. ML initial state
624/// 3. OF initial state
625/// 4. LL initial state (written last, read first)
626#[allow(unused_variables)]
627fn build_fse_bitstream(encoded: &[EncodedSequence], tans: &mut InterleavedTansEncoder) -> Vec<u8> {
628    #[cfg(test)]
629    let debug = std::env::var("DEBUG_FSE").is_ok();
630    if encoded.is_empty() {
631        return vec![0x01]; // Minimal sentinel
632    }
633
634    let mut bits = FseBitWriter::new();
635
636    // Get accuracy logs
637    let (ll_log, of_log, ml_log) = tans.accuracy_logs();
638
639    // Zstd bitstream layout:
640    // - States at MSB end (read first, before LSB switch)
641    // - After LSB switch, decoder reads in FORWARD order (seq 0, 1, 2, ..., N-1):
642    //   - For each seq: extras (LL, ML, OF), then FSE update bits (LL, OF, ML) (skip for last)
643    //
644    // tANS encoding MUST process symbols in REVERSE order to produce correct FSE bits.
645    // But we need to write bits in FORWARD order for LSB-first reading.
646    //
647    // Solution: Pre-compute FSE bits in reverse order, store them, then write in forward order.
648
649    let last_idx = encoded.len() - 1;
650    let last_seq = &encoded[last_idx];
651
652    // Step 1: Initialize with the LAST sequence's symbols
653    tans.init_states(last_seq.ll_code, last_seq.of_code, last_seq.ml_code);
654
655    #[cfg(test)]
656    if std::env::var("DEBUG_FSE_DETAIL").is_ok() {
657        let (ll_s, of_s, ml_s) = tans.get_states();
658        eprintln!(
659            "FSE init with codes ({}, {}, {}): states = ({}, {}, {})",
660            last_seq.ll_code, last_seq.of_code, last_seq.ml_code, ll_s, of_s, ml_s
661        );
662    }
663
664    // Step 2: Encode FSE bits in REVERSE order (seq N-2 down to 0)
665    // We skip the last sequence because the decoder doesn't update states for it.
666    let mut fse_bits_per_seq: Vec<[(u32, u8); 3]> = vec![[(0, 0); 3]; last_idx];
667
668    for i in (0..last_idx).rev() {
669        let seq = &encoded[i];
670        let fse_bits = tans.encode_sequence(seq.ll_code, seq.of_code, seq.ml_code);
671
672        #[cfg(test)]
673        if std::env::var("DEBUG_FSE_DETAIL").is_ok() {
674            let (ll_s, of_s, ml_s) = tans.get_states();
675            eprintln!("FSE encode seq[{}] codes ({}, {}, {}): bits=LL({},{}) ML({},{}) OF({},{}), new states=({}, {}, {})",
676                      i, seq.ll_code, seq.of_code, seq.ml_code,
677                      fse_bits[0].0, fse_bits[0].1,
678                      fse_bits[2].0, fse_bits[2].1,
679                      fse_bits[1].0, fse_bits[1].1,
680                      ll_s, of_s, ml_s);
681        }
682
683        fse_bits_per_seq[i] = fse_bits;
684    }
685
686    // Step 3: Write in FORWARD order for LSB-first reading
687    // For each non-last sequence: write extras, then FSE update bits.
688    // Extra bits order: LL, ML, OF (matches decoder read order)
689    // FSE update bits order: LL, ML, OF (matches decoder state update order)
690    for i in 0..last_idx {
691        let seq = &encoded[i];
692
693        // Write extra bits in LL, ML, OF order
694        if seq.ll_bits > 0 {
695            bits.write_bits(seq.ll_extra, seq.ll_bits);
696        }
697        if seq.ml_bits > 0 {
698            bits.write_bits(seq.ml_extra, seq.ml_bits);
699        }
700        if seq.of_bits > 0 {
701            bits.write_bits(seq.of_extra, seq.of_bits);
702        }
703
704        // Write FSE update bits: LL, ML, OF order
705        let [ll_fse, of_fse, ml_fse] = fse_bits_per_seq[i];
706        bits.write_bits(ll_fse.0, ll_fse.1);
707        bits.write_bits(ml_fse.0, ml_fse.1);
708        bits.write_bits(of_fse.0, of_fse.1);
709    }
710
711    // Write LAST sequence's extra bits (no FSE update for last)
712    if last_seq.ll_bits > 0 {
713        bits.write_bits(last_seq.ll_extra, last_seq.ll_bits);
714    }
715    if last_seq.ml_bits > 0 {
716        bits.write_bits(last_seq.ml_extra, last_seq.ml_bits);
717    }
718    if last_seq.of_bits > 0 {
719        bits.write_bits(last_seq.of_extra, last_seq.of_bits);
720    }
721
722    // Get final encoder states (become initial decoder states)
723    let (ll_state, of_state, ml_state) = tans.get_states();
724
725    #[cfg(test)]
726    if std::env::var("DEBUG_FSE").is_ok() {
727        eprintln!("FSE encode: {} sequences", encoded.len());
728        eprintln!(
729            "  Last seq: ll_code={}, of_code={}, ml_code={}",
730            last_seq.ll_code, last_seq.of_code, last_seq.ml_code
731        );
732        eprintln!(
733            "  Last seq extras: ll={}({} bits), ml={}({} bits), of={}({} bits)",
734            last_seq.ll_extra,
735            last_seq.ll_bits,
736            last_seq.ml_extra,
737            last_seq.ml_bits,
738            last_seq.of_extra,
739            last_seq.of_bits
740        );
741        eprintln!(
742            "  Final states: ll={}, of={}, ml={}",
743            ll_state, of_state, ml_state
744        );
745    }
746
747    // Write states in ML, OF, LL order
748    // They end up at MSB end, decoder reads MSB-first (LL, OF, ML)
749    bits.write_bits(ml_state, ml_log);
750    bits.write_bits(of_state, of_log);
751    bits.write_bits(ll_state, ll_log);
752
753    bits.finish()
754}
755
756/// Build the bitstream for RLE mode.
757///
758/// In RLE mode, the bitstream only contains extra bits (no FSE state updates).
759/// Extra bits are laid out in LL, ML, OF order per sequence,
760/// sequences written in reverse order for backward reading.
761fn build_rle_bitstream(encoded: &[EncodedSequence]) -> Vec<u8> {
762    if encoded.is_empty() {
763        return vec![0x01]; // Minimal sentinel
764    }
765
766    let mut bits = FseBitWriter::new();
767
768    // Write sequences in reverse order (seq N-1, N-2, ..., 0)
769    // Extra bits within each sequence: LL, ML, OF
770    for seq in encoded.iter().rev() {
771        if seq.ll_bits > 0 {
772            bits.write_bits(seq.ll_extra, seq.ll_bits);
773        }
774        if seq.ml_bits > 0 {
775            bits.write_bits(seq.ml_extra, seq.ml_bits);
776        }
777        if seq.of_bits > 0 {
778            bits.write_bits(seq.of_extra, seq.of_bits);
779        }
780    }
781
782    bits.finish()
783}
784
785// =============================================================================
786// Tests
787// =============================================================================
788
789#[cfg(test)]
790mod tests {
791    use super::*;
792
793    #[test]
794    fn test_encode_literal_length_small() {
795        // Codes 0-15 map directly to values 0-15 with no extra bits
796        for i in 0..16 {
797            let (code, extra, bits) = encode_literal_length(i);
798            assert_eq!(code, i as u8);
799            assert_eq!(extra, 0);
800            assert_eq!(bits, 0);
801        }
802    }
803
804    #[test]
805    fn test_encode_literal_length_with_extra_bits() {
806        // Code 16: 1 extra bit, baseline 16
807        let (code, extra, bits) = encode_literal_length(16);
808        assert_eq!(code, 16);
809        assert_eq!(extra, 0);
810        assert_eq!(bits, 1);
811
812        let (code, extra, bits) = encode_literal_length(17);
813        assert_eq!(code, 16);
814        assert_eq!(extra, 1);
815        assert_eq!(bits, 1);
816    }
817
818    #[test]
819    fn test_encode_match_length() {
820        // Code 0: baseline 3, no extra bits
821        let (code, extra, bits) = encode_match_length(3);
822        assert_eq!(code, 0);
823        assert_eq!(extra, 0);
824        assert_eq!(bits, 0);
825
826        // Code 1: baseline 4
827        let (code, extra, bits) = encode_match_length(4);
828        assert_eq!(code, 1);
829        assert_eq!(extra, 0);
830        assert_eq!(bits, 0);
831    }
832
833    #[test]
834    fn test_encode_offset() {
835        // Per RFC 8878 Table 7:
836        //   Offset_Value = (1 << Offset_Code) + Extra_Bits
837        // Inverse:
838        //   Offset_Code = floor(log2(Offset_Value))
839        //   Extra_Bits = Offset_Value - (1 << Offset_Code)
840
841        // offset_value 1 → code 0, extra 0
842        let (code, extra, bits) = encode_offset(1);
843        assert_eq!(code, 0);
844        assert_eq!(extra, 0);
845        assert_eq!(bits, 0);
846
847        // offset_value 2 → code 1, extra 0
848        let (code, extra, bits) = encode_offset(2);
849        assert_eq!(code, 1);
850        assert_eq!(extra, 0);
851        assert_eq!(bits, 1);
852
853        // offset_value 3 → code 1, extra 1
854        let (code, extra, bits) = encode_offset(3);
855        assert_eq!(code, 1);
856        assert_eq!(extra, 1);
857        assert_eq!(bits, 1);
858
859        // offset_value 4 → code 2, extra 0
860        let (code, extra, bits) = encode_offset(4);
861        assert_eq!(code, 2);
862        assert_eq!(extra, 0);
863        assert_eq!(bits, 2);
864
865        // offset_value 7 → code 2, extra 3
866        let (code, extra, bits) = encode_offset(7);
867        assert_eq!(code, 2);
868        assert_eq!(extra, 3);
869        assert_eq!(bits, 2);
870
871        // offset_value 8 → code 3, extra 0
872        let (code, extra, bits) = encode_offset(8);
873        assert_eq!(code, 3);
874        assert_eq!(extra, 0);
875        assert_eq!(bits, 3);
876
877        // offset_value 19 → code 4, extra 3 (19 = 16 + 3)
878        let (code, extra, bits) = encode_offset(19);
879        assert_eq!(code, 4);
880        assert_eq!(extra, 3);
881        assert_eq!(bits, 4);
882
883        // offset_value 100 → code 6, extra 36 (100 = 64 + 36)
884        let (code, extra, bits) = encode_offset(100);
885        assert_eq!(code, 6);
886        assert_eq!(extra, 36);
887        assert_eq!(bits, 6);
888    }
889
890    #[test]
891    fn test_analyze_for_rle_uniform() {
892        let sequences = vec![
893            Sequence::new(0, 4, 3), // LL=0, OF code for 4, ML=3
894            Sequence::new(0, 4, 3),
895            Sequence::new(0, 4, 3),
896        ];
897
898        let suitability = analyze_for_rle(&sequences);
899        assert!(suitability.all_uniform());
900    }
901
902    #[test]
903    fn test_analyze_for_rle_non_uniform() {
904        let sequences = vec![
905            Sequence::new(0, 4, 3),
906            Sequence::new(10, 100, 20), // Different values
907        ];
908
909        let suitability = analyze_for_rle(&sequences);
910        assert!(!suitability.all_uniform());
911    }
912
913    #[test]
914    fn test_encode_sequences_rle_empty() {
915        let sequences: Vec<Sequence> = vec![];
916        let suitability = analyze_for_rle(&sequences);
917
918        let mut output = Vec::new();
919        encode_sequences_rle(&sequences, &suitability, &mut output).unwrap();
920
921        assert_eq!(output, vec![0]); // Just count = 0
922    }
923
924    #[test]
925    fn test_encode_sequences_rle_single() {
926        let sequences = vec![Sequence::new(0, 4, 3)];
927        let suitability = analyze_for_rle(&sequences);
928
929        let mut output = Vec::new();
930        encode_sequences_rle(&sequences, &suitability, &mut output).unwrap();
931
932        // Should have: count(1), mode(0x15), LL code, OF code, ML code, bitstream
933        assert!(output.len() >= 5);
934        assert_eq!(output[0], 1); // count
935        assert_eq!(output[1], 0x15); // RLE mode for all three streams
936    }
937
938    #[test]
939    fn test_encoded_sequence_creation() {
940        let seq = Sequence::new(5, 8, 10);
941        let encoded = EncodedSequence::from_sequence(&seq);
942
943        assert_eq!(encoded.ll_code, 5); // Direct mapping for 0-15
944        assert_eq!(encoded.ml_code, 7); // 10 - 3 = 7th match length code
945    }
946}