Skip to main content

haagenti_zstd/block/
sequences.rs

1//! Sequences section decoding.
2//!
3//! Sequences are LZ77-style commands: (literal_length, offset, match_length).
4//!
5//! ## Repeat Offsets
6//!
7//! Zstd uses repeat offsets to efficiently encode recently-used offsets.
8//! Offset values 1-3 are interpreted as repeat offset references.
9//! Initial repeat offsets are [1, 4, 8] per RFC 8878.
10//!
11//! ## Symbol Compression Modes
12//!
13//! Each of LL/OF/ML can use different compression modes:
14//! - Predefined: Use hardcoded FSE distributions
15//! - RLE: Single symbol repeated for all sequences
16//! - FSE: Custom FSE table encoded in the stream
17//! - Repeat: Reuse previous FSE table (not yet implemented)
18
19use crate::block::literals::LiteralsSection;
20use crate::fse::{BitReader, FseDecoder, FseTable};
21use haagenti_core::{Error, Result};
22
23/// Result type for parse_table_for_mode: (optional (table, bytes_consumed), optional RLE symbol)
24type ParseTableResult = Result<(Option<(FseTable, usize)>, Option<u8>)>;
25
26/// Maximum symbol values for sequence codes (RFC 8878)
27const MAX_LL_SYMBOL: u8 = 35; // Literal length codes 0-35
28const MAX_OF_SYMBOL: u8 = 31; // Offset codes 0-31
29const MAX_ML_SYMBOL: u8 = 52; // Match length codes 0-52
30
31/// Tracks the three repeat offsets used for efficient offset encoding.
32///
33/// Per RFC 8878 Section 3.1.1.5, offset values 1-3 reference recent offsets.
34#[derive(Debug, Clone, Copy)]
35struct RepeatOffsets {
36    offsets: [u32; 3],
37}
38
39impl RepeatOffsets {
40    /// Create with initial values per RFC 8878.
41    fn new() -> Self {
42        Self { offsets: [1, 4, 8] }
43    }
44
45    /// Resolve an offset_value to an actual offset and update repeat offsets.
46    ///
47    /// Per RFC 8878:
48    /// - offset_value 1: Use repeat_offset_1 (special case when literal_length == 0)
49    /// - offset_value 2: Use repeat_offset_2
50    /// - offset_value 3: Use repeat_offset_3 (special case when literal_length == 0)
51    /// - offset_value > 3: New offset = value - 3
52    fn resolve(&mut self, offset_value: u32, literal_length: u32) -> u32 {
53        if offset_value == 0 {
54            // Invalid per spec, but handle gracefully
55            1
56        } else if offset_value <= 3 {
57            // Repeat offset reference
58            if literal_length == 0 {
59                // Special case: when LL=0, offset codes are shifted
60                match offset_value {
61                    1 => {
62                        // Use repeat_offset_2
63                        let offset = self.offsets[1];
64                        // Swap offsets[0] and offsets[1]
65                        self.offsets.swap(0, 1);
66                        offset
67                    }
68                    2 => {
69                        // Use repeat_offset_3
70                        let offset = self.offsets[2];
71                        // Rotate: [2] -> [0], [0] -> [1], [1] -> [2]
72                        let temp = self.offsets[2];
73                        self.offsets[2] = self.offsets[1];
74                        self.offsets[1] = self.offsets[0];
75                        self.offsets[0] = temp;
76                        offset
77                    }
78                    3 => {
79                        // Use repeat_offset_1 - 1 (with minimum of 1)
80                        let offset = self.offsets[0].saturating_sub(1).max(1);
81                        // This becomes the new repeat_offset_1
82                        self.offsets[2] = self.offsets[1];
83                        self.offsets[1] = self.offsets[0];
84                        self.offsets[0] = offset;
85                        offset
86                    }
87                    _ => unreachable!(),
88                }
89            } else {
90                // Normal case: use the indexed repeat offset
91                let idx = (offset_value - 1) as usize;
92                let offset = self.offsets[idx];
93
94                // Rotate used offset to front
95                if idx > 0 {
96                    let temp = self.offsets[idx];
97                    for i in (1..=idx).rev() {
98                        self.offsets[i] = self.offsets[i - 1];
99                    }
100                    self.offsets[0] = temp;
101                }
102                offset
103            }
104        } else {
105            // New offset: value - 3
106            let offset = offset_value - 3;
107            // Push to front, shift others back
108            self.offsets[2] = self.offsets[1];
109            self.offsets[1] = self.offsets[0];
110            self.offsets[0] = offset;
111            offset
112        }
113    }
114}
115
116/// A single decoded sequence.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub struct Sequence {
119    /// Number of literals to copy before the match.
120    pub literal_length: u32,
121    /// Offset back in the output buffer.
122    pub offset: u32,
123    /// Number of bytes to copy from the match position.
124    pub match_length: u32,
125}
126
127impl Sequence {
128    /// Create a new sequence.
129    pub fn new(literal_length: u32, offset: u32, match_length: u32) -> Self {
130        Self {
131            literal_length,
132            offset,
133            match_length,
134        }
135    }
136}
137
138/// Symbol decoding mode for sequences.
139#[derive(Debug, Clone, Copy, PartialEq, Eq)]
140pub enum SymbolMode {
141    /// Predefined FSE distribution.
142    Predefined,
143    /// RLE mode - single symbol repeated.
144    Rle,
145    /// FSE compressed.
146    Fse,
147    /// Repeat previous FSE table.
148    Repeat,
149}
150
151impl SymbolMode {
152    /// Parse mode from 2-bit field.
153    pub fn from_field(field: u8) -> Self {
154        match field {
155            0 => SymbolMode::Predefined,
156            1 => SymbolMode::Rle,
157            2 => SymbolMode::Fse,
158            3 => SymbolMode::Repeat,
159            _ => unreachable!(),
160        }
161    }
162}
163
164/// Parsed sequences section.
165#[derive(Debug, Clone)]
166pub struct SequencesSection {
167    /// Number of sequences.
168    pub num_sequences: usize,
169    /// The decoded sequences.
170    pub sequences: Vec<Sequence>,
171}
172
173impl SequencesSection {
174    /// Parse a sequences section.
175    ///
176    /// # Arguments
177    /// * `input` - Input data starting at sequences section
178    /// * `literals` - Previously parsed literals section (for context)
179    pub fn parse(input: &[u8], _literals: &LiteralsSection) -> Result<Self> {
180        if input.is_empty() {
181            // No sequences is valid
182            return Ok(Self {
183                num_sequences: 0,
184                sequences: Vec::new(),
185            });
186        }
187
188        // Parse number of sequences
189        let (num_sequences, count_header_size) = Self::parse_sequence_count(input)?;
190
191        if num_sequences == 0 {
192            return Ok(Self {
193                num_sequences: 0,
194                sequences: Vec::new(),
195            });
196        }
197
198        if input.len() < count_header_size + 1 {
199            return Err(Error::corrupted("Sequences section truncated"));
200        }
201
202        // Parse symbol compression modes
203        // Per RFC 8878 Section 3.1.1.4:
204        //   bits[1:0] = Literals_Lengths_Mode
205        //   bits[3:2] = Offsets_Mode
206        //   bits[5:4] = Match_Lengths_Mode
207        //   bits[7:6] = Reserved (must be 0)
208        let mode_byte = input[count_header_size];
209        let ll_mode = SymbolMode::from_field(mode_byte & 0x03);
210        let of_mode = SymbolMode::from_field((mode_byte >> 2) & 0x03);
211        let ml_mode = SymbolMode::from_field((mode_byte >> 4) & 0x03);
212
213        let mut pos = count_header_size + 1;
214
215        // Check for all-RLE special case (fast path)
216        if ll_mode == SymbolMode::Rle && of_mode == SymbolMode::Rle && ml_mode == SymbolMode::Rle {
217            if input.len() < pos + 3 {
218                return Err(Error::corrupted("RLE sequence symbols truncated"));
219            }
220            let ll_sym = input[pos];
221            let of_sym = input[pos + 1];
222            let ml_sym = input[pos + 2];
223            pos += 3;
224
225            return Self::decode_rle_sequences(
226                &input[pos..],
227                num_sequences,
228                ll_sym,
229                of_sym,
230                ml_sym,
231            );
232        }
233
234        // Parse each FSE table based on its mode
235        let (ll_table, ll_rle_sym) = Self::parse_table_for_mode(
236            ll_mode,
237            &input[pos..],
238            MAX_LL_SYMBOL,
239            "Literal Length",
240            &PREDEFINED_LL_DISTRIBUTION,
241            PREDEFINED_LL_ACCURACY_LOG,
242        )?;
243        pos += ll_table.as_ref().map_or(0, |(_, consumed)| *consumed);
244
245        let (of_table, of_rle_sym) = Self::parse_table_for_mode(
246            of_mode,
247            &input[pos..],
248            MAX_OF_SYMBOL,
249            "Offset",
250            &PREDEFINED_OF_DISTRIBUTION,
251            PREDEFINED_OF_ACCURACY_LOG,
252        )?;
253        pos += of_table.as_ref().map_or(0, |(_, consumed)| *consumed);
254
255        let (ml_table, ml_rle_sym) = Self::parse_table_for_mode(
256            ml_mode,
257            &input[pos..],
258            MAX_ML_SYMBOL,
259            "Match Length",
260            &PREDEFINED_ML_DISTRIBUTION,
261            PREDEFINED_ML_ACCURACY_LOG,
262        )?;
263        pos += ml_table.as_ref().map_or(0, |(_, consumed)| *consumed);
264
265        // If any are RLE, decode with RLE symbols
266        let has_rle = ll_rle_sym.is_some() || of_rle_sym.is_some() || ml_rle_sym.is_some();
267
268        // For mixed modes, we need the tables
269        let ll = ll_table
270            .map(|(t, _)| t)
271            .or_else(|| {
272                FseTable::from_predefined(&PREDEFINED_LL_DISTRIBUTION, PREDEFINED_LL_ACCURACY_LOG)
273                    .ok()
274            })
275            .ok_or_else(|| Error::corrupted("Failed to build LL table"))?;
276
277        let of = of_table
278            .map(|(t, _)| t)
279            .or_else(|| {
280                FseTable::from_predefined(&PREDEFINED_OF_DISTRIBUTION, PREDEFINED_OF_ACCURACY_LOG)
281                    .ok()
282            })
283            .ok_or_else(|| Error::corrupted("Failed to build OF table"))?;
284
285        let ml = ml_table
286            .map(|(t, _)| t)
287            .or_else(|| {
288                FseTable::from_predefined(&PREDEFINED_ML_DISTRIBUTION, PREDEFINED_ML_ACCURACY_LOG)
289                    .ok()
290            })
291            .ok_or_else(|| Error::corrupted("Failed to build ML table"))?;
292
293        // Handle mixed RLE/FSE modes
294        if has_rle {
295            return Self::decode_mixed_sequences(
296                &input[pos..],
297                num_sequences,
298                &ll,
299                ll_rle_sym,
300                &of,
301                of_rle_sym,
302                &ml,
303                ml_rle_sym,
304            );
305        }
306
307        let (ll_table, of_table, ml_table) = (ll, of, ml);
308
309        // Decode sequences from bitstream
310        let bitstream_data = &input[pos..];
311        Self::decode_fse_sequences(
312            bitstream_data,
313            num_sequences,
314            &ll_table,
315            &of_table,
316            &ml_table,
317        )
318    }
319
320    /// Decode sequences using FSE tables.
321    fn decode_fse_sequences(
322        data: &[u8],
323        num_sequences: usize,
324        ll_table: &FseTable,
325        of_table: &FseTable,
326        ml_table: &FseTable,
327    ) -> Result<Self> {
328        if data.is_empty() {
329            return Err(Error::corrupted("Empty sequence bitstream"));
330        }
331
332        // Initialize bit reader from end (backward bitstream)
333        let mut bits = BitReader::new(data);
334        bits.init_from_end()?;
335
336        // Initialize FSE decoders
337        let mut ll_decoder = FseDecoder::new(ll_table);
338        let mut of_decoder = FseDecoder::new(of_table);
339        let mut ml_decoder = FseDecoder::new(ml_table);
340
341        // Read initial states (MSB-first from sentinel)
342        // Per RFC 8878: read LL, OF, ML order
343        ll_decoder.init_state(&mut bits)?;
344        of_decoder.init_state(&mut bits)?;
345        ml_decoder.init_state(&mut bits)?;
346
347        // Switch to LSB-first mode for reading extra bits
348        // Extra bits are at the beginning of the bitstream (read from bit 0 up)
349        bits.switch_to_lsb_mode()?;
350
351        let mut sequences = Vec::with_capacity(num_sequences);
352        let mut repeat_offsets = RepeatOffsets::new();
353
354        for i in 0..num_sequences {
355            // Per RFC 8878 and zstd reference implementation:
356            // 1. Get symbols from current state (no bits read)
357            // 2. Read extra bits in REVERSE of decode order: LL, ML, OF (from LSB)
358            //    The encoder writes them in this order, so we read them back the same way.
359            // 3. Update states with FSE bits: LL, ML, OF order (skip for last sequence)
360            let is_last = i == num_sequences - 1;
361
362            // Step 1: Peek symbols/values from current states
363            let ll_code = ll_decoder.peek_symbol();
364            let of_code = of_decoder.peek_symbol();
365            let _ml_code = ml_decoder.peek_symbol();
366
367            // Step 2: Read extra bits in LL, ML, OF order
368            // This matches our encoder write order for internal consistency.
369
370            // LL extra bits (read first from LSB)
371            let ll_extra_bits = if ll_code < LITERAL_LENGTH_BASELINE.len() as u8 {
372                let (bits_needed, _) = LITERAL_LENGTH_BASELINE[ll_code as usize];
373                if bits_needed > 0 {
374                    bits.read_bits(bits_needed as usize)?
375                } else {
376                    0
377                }
378            } else {
379                0
380            };
381
382            // ML extra bits (read second)
383            // IMPORTANT: We must read seq_base BEFORE update_state, as it comes from current state
384            let ml_seq_extra_bits = ml_decoder.peek_seq_extra_bits();
385            let ml_seq_base = ml_decoder.peek_seq_base();
386            let ml_extra_bits = if ml_seq_extra_bits > 0 {
387                bits.read_bits(ml_seq_extra_bits as usize)?
388            } else {
389                0
390            };
391
392            // OF extra bits (read last)
393            let of_num_bits = offset_code_extra_bits(of_code);
394            let of_extra_bits = if of_num_bits > 0 {
395                bits.read_bits(of_num_bits as usize)?
396            } else {
397                0
398            };
399
400            // Step 3: Update states with FSE bits (skip for last sequence)
401            // Order per zstd: LL, ML, OF (matching encoder write order)
402            if !is_last {
403                ll_decoder.update_state(&mut bits)?;
404                ml_decoder.update_state(&mut bits)?;
405                of_decoder.update_state(&mut bits)?;
406            }
407
408            // Decode final values
409            let literal_length = decode_literal_length(ll_code, ll_extra_bits);
410            // Match length = seq_base + extra bits (seq_base was read BEFORE update_state)
411            let match_length = ml_seq_base + ml_extra_bits;
412            let offset_value = decode_offset(of_code, of_extra_bits);
413
414            // Convert offset_value to actual offset using repeat offset handling
415            let offset = repeat_offsets.resolve(offset_value, literal_length);
416
417            sequences.push(Sequence::new(literal_length, offset, match_length));
418        }
419
420        Ok(Self {
421            num_sequences,
422            sequences,
423        })
424    }
425
426    /// Decode sequences in RLE mode (single repeated symbol for each type).
427    fn decode_rle_sequences(
428        data: &[u8],
429        num_sequences: usize,
430        ll_sym: u8,
431        of_sym: u8,
432        ml_sym: u8,
433    ) -> Result<Self> {
434        if data.is_empty() && num_sequences > 0 {
435            return Err(Error::corrupted("Empty RLE sequence data"));
436        }
437
438        let mut bits = BitReader::new(data);
439        if !data.is_empty() {
440            // RLE mode: extra bits only (no FSE states), read LSB-first
441            bits.init_fse()?;
442        }
443
444        let mut sequences = Vec::with_capacity(num_sequences);
445        let mut repeat_offsets = RepeatOffsets::new();
446
447        for _ in 0..num_sequences {
448            // Read extra bits in LL, ML, OF order (forward from LSB)
449
450            // LL extra bits (read first from LSB)
451            let ll_extra_bits = if ll_sym < LITERAL_LENGTH_BASELINE.len() as u8 {
452                let (bits_needed, _) = LITERAL_LENGTH_BASELINE[ll_sym as usize];
453                if bits_needed > 0 {
454                    bits.read_bits(bits_needed as usize)?
455                } else {
456                    0
457                }
458            } else {
459                0
460            };
461
462            // ML extra bits (read second)
463            let ml_extra_bits = if ml_sym < MATCH_LENGTH_BASELINE.len() as u8 {
464                let (bits_needed, _) = MATCH_LENGTH_BASELINE[ml_sym as usize];
465                if bits_needed > 0 {
466                    bits.read_bits(bits_needed as usize)?
467                } else {
468                    0
469                }
470            } else {
471                0
472            };
473
474            // OF extra bits (read last)
475            let of_num_bits = offset_code_extra_bits(of_sym);
476            let of_extra_bits = if of_num_bits > 0 {
477                bits.read_bits(of_num_bits as usize)?
478            } else {
479                0
480            };
481
482            let literal_length = decode_literal_length(ll_sym, ll_extra_bits);
483            let match_length = decode_match_length(ml_sym, ml_extra_bits);
484            let offset_value = decode_offset(of_sym, of_extra_bits);
485
486            // Convert offset_value to actual offset using repeat offset handling
487            let offset = repeat_offsets.resolve(offset_value, literal_length);
488
489            sequences.push(Sequence::new(literal_length, offset, match_length));
490        }
491
492        Ok(Self {
493            num_sequences,
494            sequences,
495        })
496    }
497
498    /// Parse FSE table or RLE symbol based on mode.
499    ///
500    /// Returns (Some((table, bytes_consumed)), None) for FSE/Predefined modes,
501    /// or (None, Some(rle_symbol)) for RLE mode.
502    fn parse_table_for_mode(
503        mode: SymbolMode,
504        data: &[u8],
505        max_symbol: u8,
506        name: &str,
507        predefined_dist: &[i16],
508        predefined_log: u8,
509    ) -> ParseTableResult {
510        match mode {
511            SymbolMode::Predefined => {
512                let table = FseTable::from_predefined(predefined_dist, predefined_log)?;
513                Ok((Some((table, 0)), None))
514            }
515            SymbolMode::Rle => {
516                if data.is_empty() {
517                    return Err(Error::corrupted(format!("{} RLE symbol missing", name)));
518                }
519                // RLE consumes 1 byte for the symbol, return in tuple for position tracking
520                Ok((
521                    Some((
522                        FseTable::from_predefined(predefined_dist, predefined_log)?,
523                        1,
524                    )),
525                    Some(data[0]),
526                ))
527            }
528            SymbolMode::Fse => {
529                let (table, consumed) = FseTable::parse(data, max_symbol)?;
530                Ok((Some((table, consumed)), None))
531            }
532            SymbolMode::Repeat => {
533                // Repeat mode requires storing previous tables, not yet implemented
534                Err(Error::Unsupported(format!(
535                    "{} Repeat mode not yet implemented",
536                    name
537                )))
538            }
539        }
540    }
541
542    /// Decode sequences with mixed FSE/RLE modes.
543    ///
544    /// Some symbols come from RLE (fixed), others from FSE tables.
545    #[allow(clippy::too_many_arguments)]
546    fn decode_mixed_sequences(
547        data: &[u8],
548        num_sequences: usize,
549        ll_table: &FseTable,
550        ll_rle: Option<u8>,
551        of_table: &FseTable,
552        of_rle: Option<u8>,
553        ml_table: &FseTable,
554        ml_rle: Option<u8>,
555    ) -> Result<Self> {
556        if data.is_empty() && num_sequences > 0 {
557            return Err(Error::corrupted("Empty mixed sequence data"));
558        }
559
560        let mut bits = BitReader::new(data);
561        if !data.is_empty() {
562            bits.init_from_end()?;
563        }
564
565        // Initialize FSE decoders for non-RLE streams
566        let mut ll_decoder = if ll_rle.is_none() {
567            let mut d = FseDecoder::new(ll_table);
568            d.init_state(&mut bits)?;
569            Some(d)
570        } else {
571            None
572        };
573
574        let mut of_decoder = if of_rle.is_none() {
575            let mut d = FseDecoder::new(of_table);
576            d.init_state(&mut bits)?;
577            Some(d)
578        } else {
579            None
580        };
581
582        let mut ml_decoder = if ml_rle.is_none() {
583            let mut d = FseDecoder::new(ml_table);
584            d.init_state(&mut bits)?;
585            Some(d)
586        } else {
587            None
588        };
589
590        // Switch to LSB-first mode for reading extra bits
591        bits.switch_to_lsb_mode()?;
592
593        let mut sequences = Vec::with_capacity(num_sequences);
594        let mut repeat_offsets = RepeatOffsets::new();
595
596        for _ in 0..num_sequences {
597            // Get codes from FSE or RLE
598            let of_code = if let Some(ref mut dec) = of_decoder {
599                dec.peek_symbol()
600            } else {
601                of_rle.unwrap()
602            };
603
604            let ml_code = if let Some(ref mut dec) = ml_decoder {
605                dec.decode_symbol(&mut bits)?
606            } else {
607                ml_rle.unwrap()
608            };
609
610            let ll_code = if let Some(ref mut dec) = ll_decoder {
611                dec.decode_symbol(&mut bits)?
612            } else {
613                ll_rle.unwrap()
614            };
615
616            // Read extra bits in LL, ML, OF order
617            let ll_extra_bits = if ll_code < LITERAL_LENGTH_BASELINE.len() as u8 {
618                let (bits_needed, _) = LITERAL_LENGTH_BASELINE[ll_code as usize];
619                if bits_needed > 0 {
620                    bits.read_bits(bits_needed as usize)?
621                } else {
622                    0
623                }
624            } else {
625                0
626            };
627
628            let ml_extra_bits = if ml_code < MATCH_LENGTH_BASELINE.len() as u8 {
629                let (bits_needed, _) = MATCH_LENGTH_BASELINE[ml_code as usize];
630                if bits_needed > 0 {
631                    bits.read_bits(bits_needed as usize)?
632                } else {
633                    0
634                }
635            } else {
636                0
637            };
638
639            let of_num_bits = offset_code_extra_bits(of_code);
640            let of_extra_bits = if of_num_bits > 0 {
641                bits.read_bits(of_num_bits as usize)?
642            } else {
643                0
644            };
645
646            let literal_length = decode_literal_length(ll_code, ll_extra_bits);
647            let match_length = decode_match_length(ml_code, ml_extra_bits);
648            let offset_value = decode_offset(of_code, of_extra_bits);
649            let offset = repeat_offsets.resolve(offset_value, literal_length);
650
651            sequences.push(Sequence::new(literal_length, offset, match_length));
652        }
653
654        Ok(Self {
655            num_sequences,
656            sequences,
657        })
658    }
659
660    /// Parse the sequence count from the header.
661    fn parse_sequence_count(input: &[u8]) -> Result<(usize, usize)> {
662        if input.is_empty() {
663            return Err(Error::corrupted("Empty sequences header"));
664        }
665
666        let byte0 = input[0] as usize;
667
668        if byte0 == 0 {
669            // No sequences
670            Ok((0, 1))
671        } else if byte0 < 128 {
672            // 1 byte: count = byte0
673            Ok((byte0, 1))
674        } else if byte0 < 255 {
675            // 2 bytes: count = ((byte0 - 128) << 8) + byte1
676            if input.len() < 2 {
677                return Err(Error::corrupted("Sequences count truncated"));
678            }
679            let count = ((byte0 - 128) << 8) + (input[1] as usize);
680            Ok((count, 2))
681        } else {
682            // 3 bytes: count = byte1 + (byte2 << 8) + 0x7F00
683            if input.len() < 3 {
684                return Err(Error::corrupted("Sequences count truncated"));
685            }
686            let count = (input[1] as usize) + ((input[2] as usize) << 8) + 0x7F00;
687            Ok((count, 3))
688        }
689    }
690}
691
692// =============================================================================
693// Predefined FSE Distributions (RFC 8878)
694// =============================================================================
695
696/// Predefined literal length distribution (accuracy log = 6).
697/// 36 symbols with normalized frequencies summing to 64.
698pub const PREDEFINED_LL_DISTRIBUTION: [i16; 36] = [
699    4, 3, 2, 2, 2, 2, 2, 2, // Codes 0-7
700    2, 2, 2, 2, 2, 1, 1, 1, // Codes 8-15
701    2, 2, 2, 2, 2, 2, 2, 2, // Codes 16-23
702    2, 3, 2, 1, 1, 1, 1, 1, // Codes 24-31
703    -1, -1, -1, -1, // Codes 32-35 (less than 1)
704];
705
706/// Predefined offset distribution (accuracy log = 5).
707/// 29 symbols with normalized frequencies summing to 32.
708pub const PREDEFINED_OF_DISTRIBUTION: [i16; 29] = [
709    1, 1, 1, 1, 1, 1, 2, 2, // Codes 0-7
710    2, 1, 1, 1, 1, 1, 1, 1, // Codes 8-15
711    1, 1, 1, 1, 1, 1, 1, 1, // Codes 16-23
712    -1, -1, -1, -1, -1, // Codes 24-28 (less than 1)
713];
714
715/// Predefined match length distribution (accuracy log = 6).
716/// 53 symbols with normalized frequencies summing to 64.
717pub const PREDEFINED_ML_DISTRIBUTION: [i16; 53] = [
718    1, 4, 3, 2, 2, 2, 2, 2, // Codes 0-7
719    2, 1, 1, 1, 1, 1, 1, 1, // Codes 8-15
720    1, 1, 1, 1, 1, 1, 1, 1, // Codes 16-23
721    1, 1, 1, 1, 1, 1, 1, 1, // Codes 24-31
722    1, 1, 1, 1, 1, 1, 1, 1, // Codes 32-39
723    1, 1, 1, 1, 1, 1, -1, -1, // Codes 40-47
724    -1, -1, -1, -1, -1, // Codes 48-52 (less than 1)
725];
726
727/// Predefined accuracy log for literal lengths.
728pub const PREDEFINED_LL_ACCURACY_LOG: u8 = 6;
729
730/// Predefined accuracy log for offsets.
731pub const PREDEFINED_OF_ACCURACY_LOG: u8 = 5;
732
733/// Predefined accuracy log for match lengths.
734pub const PREDEFINED_ML_ACCURACY_LOG: u8 = 6;
735
736// =============================================================================
737// Baseline Tables (RFC 8878)
738// =============================================================================
739
740/// Literal length code to (extra bits, baseline) mapping.
741pub const LITERAL_LENGTH_BASELINE: [(u8, u32); 36] = [
742    (0, 0),
743    (0, 1),
744    (0, 2),
745    (0, 3),
746    (0, 4),
747    (0, 5),
748    (0, 6),
749    (0, 7),
750    (0, 8),
751    (0, 9),
752    (0, 10),
753    (0, 11),
754    (0, 12),
755    (0, 13),
756    (0, 14),
757    (0, 15),
758    (1, 16),
759    (1, 18),
760    (1, 20),
761    (1, 22),
762    (2, 24),
763    (2, 28),
764    (3, 32),
765    (3, 40),
766    (4, 48),
767    (6, 64),
768    (7, 128),
769    (8, 256),
770    (9, 512),
771    (10, 1024),
772    (11, 2048),
773    (12, 4096),
774    (13, 8192),
775    (14, 16384),
776    (15, 32768),
777    (16, 65536),
778];
779
780/// Match length code to (extra bits, baseline) mapping using ZSTD's predefined values.
781///
782/// IMPORTANT: This uses zstd's predefined values, NOT RFC 8878 Table 6.
783/// zstd's predefined ML table differs from RFC starting at code 43:
784/// - Code 43: zstd uses 7 bits (baseline 131), RFC uses 5 bits
785/// - Code 44: zstd uses 8 bits (baseline 259), RFC uses 6 bits (baseline 163)
786/// - Code 45+: All shifted to accommodate zstd's larger ranges
787///
788/// Each entry is (Number_of_Bits, Baseline) for match length codes 0-52.
789pub const MATCH_LENGTH_BASELINE: [(u8, u32); 53] = [
790    // Codes 0-31: No extra bits, match_length = baseline (3-34)
791    (0, 3),
792    (0, 4),
793    (0, 5),
794    (0, 6),
795    (0, 7),
796    (0, 8),
797    (0, 9),
798    (0, 10),
799    (0, 11),
800    (0, 12),
801    (0, 13),
802    (0, 14),
803    (0, 15),
804    (0, 16),
805    (0, 17),
806    (0, 18),
807    (0, 19),
808    (0, 20),
809    (0, 21),
810    (0, 22),
811    (0, 23),
812    (0, 24),
813    (0, 25),
814    (0, 26),
815    (0, 27),
816    (0, 28),
817    (0, 29),
818    (0, 30),
819    (0, 31),
820    (0, 32),
821    (0, 33),
822    (0, 34),
823    // Codes 32-35: 1 extra bit (matches RFC)
824    (1, 35),
825    (1, 37),
826    (1, 39),
827    (1, 41),
828    // Codes 36-37: 2 extra bits (matches RFC)
829    (2, 43),
830    (2, 47),
831    // Codes 38-39: 3 extra bits (matches RFC)
832    (3, 51),
833    (3, 59),
834    // Codes 40-41: 4 extra bits (matches RFC)
835    (4, 67),
836    (4, 83),
837    // Code 42: 5 extra bits (matches RFC)
838    (5, 99),
839    // Code 43: 7 extra bits (DIFFERS from RFC's 5 bits!)
840    (7, 131),
841    // Code 44: 8 extra bits (DIFFERS from RFC's 6 bits with baseline 163!)
842    (8, 259),
843    // Code 45: 9 extra bits (zstd: baseline 515)
844    (9, 515),
845    // Code 46: 10 extra bits (zstd: baseline 1027)
846    (10, 1027),
847    // Code 47: 11 extra bits (zstd: baseline 2051)
848    (11, 2051),
849    // Code 48: 12 extra bits (zstd: baseline 4099)
850    (12, 4099),
851    // Code 49: 13 extra bits (zstd: baseline 8195)
852    (13, 8195),
853    // Code 50: 14 extra bits (zstd: baseline 16387)
854    (14, 16387),
855    // Code 51: 15 extra bits (zstd: baseline 32771)
856    (15, 32771),
857    // Code 52: 16 extra bits (zstd: baseline 65539)
858    (16, 65539),
859];
860
861/// Decode literal length from code and extra bits.
862pub fn decode_literal_length(code: u8, extra_bits: u32) -> u32 {
863    if code as usize >= LITERAL_LENGTH_BASELINE.len() {
864        return 0;
865    }
866    let (bits, baseline) = LITERAL_LENGTH_BASELINE[code as usize];
867    if bits == 0 {
868        baseline
869    } else {
870        baseline + (extra_bits & ((1 << bits) - 1))
871    }
872}
873
874/// Decode match length from code and extra bits.
875pub fn decode_match_length(code: u8, extra_bits: u32) -> u32 {
876    if code as usize >= MATCH_LENGTH_BASELINE.len() {
877        return 3; // Minimum match length
878    }
879    let (bits, baseline) = MATCH_LENGTH_BASELINE[code as usize];
880    if bits == 0 {
881        baseline
882    } else {
883        baseline + (extra_bits & ((1 << bits) - 1))
884    }
885}
886
887/// Decode offset from code and extra bits per RFC 8878 Table 7.
888///
889/// Formula: Offset_Value = (1 << Offset_Code) + Extra_Bits
890///
891/// The returned offset_value is then interpreted by RepeatOffsets::resolve():
892/// - offset_value 1-3: use repeat offset
893/// - offset_value > 3: actual_offset = offset_value - 3
894pub fn decode_offset(code: u8, extra_bits: u32) -> u32 {
895    let code = code.min(31); // Clamp to prevent overflow
896    (1u32 << code) + extra_bits
897}
898
899/// Get the number of extra bits for an offset code per RFC 8878 Table 7.
900///
901/// Number_of_Extra_Bits = Offset_Code
902pub fn offset_code_extra_bits(code: u8) -> u8 {
903    code.min(31)
904}
905
906/// Build predefined FSE tables for sequence decoding.
907#[cfg(test)]
908fn build_predefined_tables() -> Result<(FseTable, FseTable, FseTable)> {
909    let ll_table = FseTable::build(
910        &PREDEFINED_LL_DISTRIBUTION,
911        PREDEFINED_LL_ACCURACY_LOG,
912        PREDEFINED_LL_DISTRIBUTION.len() as u8,
913    )?;
914
915    let of_table = FseTable::build(
916        &PREDEFINED_OF_DISTRIBUTION,
917        PREDEFINED_OF_ACCURACY_LOG,
918        PREDEFINED_OF_DISTRIBUTION.len() as u8,
919    )?;
920
921    let ml_table = FseTable::build(
922        &PREDEFINED_ML_DISTRIBUTION,
923        PREDEFINED_ML_ACCURACY_LOG,
924        PREDEFINED_ML_DISTRIBUTION.len() as u8,
925    )?;
926
927    Ok((ll_table, of_table, ml_table))
928}
929
930// =============================================================================
931// Tests
932// =============================================================================
933
934#[cfg(test)]
935mod tests {
936    use super::*;
937
938    #[test]
939    fn test_sequence_creation() {
940        let seq = Sequence::new(10, 5, 20);
941        assert_eq!(seq.literal_length, 10);
942        assert_eq!(seq.offset, 5);
943        assert_eq!(seq.match_length, 20);
944    }
945
946    #[test]
947    fn test_symbol_mode_parsing() {
948        assert_eq!(SymbolMode::from_field(0), SymbolMode::Predefined);
949        assert_eq!(SymbolMode::from_field(1), SymbolMode::Rle);
950        assert_eq!(SymbolMode::from_field(2), SymbolMode::Fse);
951        assert_eq!(SymbolMode::from_field(3), SymbolMode::Repeat);
952    }
953
954    #[test]
955    fn test_sequence_count_zero() {
956        let (count, size) = SequencesSection::parse_sequence_count(&[0]).unwrap();
957        assert_eq!(count, 0);
958        assert_eq!(size, 1);
959    }
960
961    #[test]
962    fn test_sequence_count_small() {
963        // Count = 50 (single byte)
964        let (count, size) = SequencesSection::parse_sequence_count(&[50]).unwrap();
965        assert_eq!(count, 50);
966        assert_eq!(size, 1);
967    }
968
969    #[test]
970    fn test_sequence_count_medium() {
971        // Count in range 128-32767 uses 2 bytes
972        // Count = 300: byte0 = 128 + (300 >> 8) = 129, byte1 = 300 & 0xFF = 44
973        // Actually: count = ((byte0 - 128) << 8) + byte1
974        // 300 = ((129 - 128) << 8) + 44 = 256 + 44 = 300 ✓
975        let (count, size) = SequencesSection::parse_sequence_count(&[129, 44]).unwrap();
976        assert_eq!(count, 300);
977        assert_eq!(size, 2);
978    }
979
980    #[test]
981    fn test_sequence_count_large() {
982        // Count >= 0x7F00 uses 3 bytes
983        // byte0 = 255, count = byte1 + (byte2 << 8) + 0x7F00
984        // For count = 0x7F00: byte1 = 0, byte2 = 0
985        let (count, size) = SequencesSection::parse_sequence_count(&[255, 0, 0]).unwrap();
986        assert_eq!(count, 0x7F00);
987        assert_eq!(size, 3);
988
989        // For count = 0x8000: need 0x8000 - 0x7F00 = 0x100 = 256
990        // byte1 = 0, byte2 = 1
991        let (count, size) = SequencesSection::parse_sequence_count(&[255, 0, 1]).unwrap();
992        assert_eq!(count, 0x8000);
993        assert_eq!(size, 3);
994    }
995
996    #[test]
997    fn test_literal_length_decoding() {
998        // Code 0-15: no extra bits, value = code
999        assert_eq!(decode_literal_length(0, 0), 0);
1000        assert_eq!(decode_literal_length(15, 0), 15);
1001
1002        // Code 16: 1 extra bit, baseline 16
1003        assert_eq!(decode_literal_length(16, 0), 16);
1004        assert_eq!(decode_literal_length(16, 1), 17);
1005
1006        // Code 20: 2 extra bits, baseline 24
1007        assert_eq!(decode_literal_length(20, 0), 24);
1008        assert_eq!(decode_literal_length(20, 3), 27);
1009    }
1010
1011    #[test]
1012    fn test_match_length_decoding() {
1013        // Code 0: baseline 3, no extra bits
1014        assert_eq!(decode_match_length(0, 0), 3);
1015
1016        // Code 31: baseline 34, no extra bits
1017        assert_eq!(decode_match_length(31, 0), 34);
1018
1019        // Code 32: baseline 35, 1 extra bit
1020        assert_eq!(decode_match_length(32, 0), 35);
1021        assert_eq!(decode_match_length(32, 1), 36);
1022    }
1023
1024    #[test]
1025    fn test_offset_decoding() {
1026        // Per RFC 8878 Table 7:
1027        // Offset_Value = (1 << Offset_Code) + Extra_Bits
1028
1029        // Code 0: values 1-2
1030        assert_eq!(decode_offset(0, 0), 1);
1031        assert_eq!(decode_offset(0, 1), 2); // Note: only 0 bits for code 0, so extra=1 exceeds range
1032
1033        // Code 1: values 2-3
1034        assert_eq!(decode_offset(1, 0), 2);
1035        assert_eq!(decode_offset(1, 1), 3);
1036
1037        // Code 2: values 4-7
1038        assert_eq!(decode_offset(2, 0), 4);
1039        assert_eq!(decode_offset(2, 3), 7);
1040
1041        // Code 3: values 8-15
1042        assert_eq!(decode_offset(3, 0), 8);
1043        assert_eq!(decode_offset(3, 7), 15);
1044
1045        // Code 10: values 1024-2047
1046        assert_eq!(decode_offset(10, 0), 1024);
1047        assert_eq!(decode_offset(10, 500), 1524);
1048    }
1049
1050    #[test]
1051    fn test_empty_sequences() {
1052        let literals = LiteralsSection::new_raw(vec![]);
1053        let result = SequencesSection::parse(&[], &literals);
1054        assert!(result.is_ok());
1055        let section = result.unwrap();
1056        assert_eq!(section.num_sequences, 0);
1057        assert!(section.sequences.is_empty());
1058    }
1059
1060    #[test]
1061    fn test_zero_sequences() {
1062        let literals = LiteralsSection::new_raw(vec![]);
1063        let result = SequencesSection::parse(&[0], &literals);
1064        assert!(result.is_ok());
1065        let section = result.unwrap();
1066        assert_eq!(section.num_sequences, 0);
1067    }
1068
1069    #[test]
1070    fn test_predefined_tables_build() {
1071        // Test that predefined tables can be built successfully
1072        let result = build_predefined_tables();
1073        assert!(result.is_ok(), "Build failed: {:?}", result.err());
1074
1075        let (ll_table, of_table, ml_table) = result.unwrap();
1076
1077        // Verify table sizes match expected
1078        assert_eq!(ll_table.accuracy_log(), PREDEFINED_LL_ACCURACY_LOG);
1079        assert_eq!(of_table.accuracy_log(), PREDEFINED_OF_ACCURACY_LOG);
1080        assert_eq!(ml_table.accuracy_log(), PREDEFINED_ML_ACCURACY_LOG);
1081    }
1082
1083    #[test]
1084    fn test_predefined_ll_distribution_sum() {
1085        // Verify the distribution sums to 2^accuracy_log (accounting for -1 values)
1086        let sum: i32 = PREDEFINED_LL_DISTRIBUTION
1087            .iter()
1088            .filter(|&&x| x > 0)
1089            .map(|&x| x as i32)
1090            .sum();
1091        // -1 values represent "less than 1" and need 1 slot each
1092        let less_than_one = PREDEFINED_LL_DISTRIBUTION
1093            .iter()
1094            .filter(|&&x| x == -1)
1095            .count();
1096
1097        // Total should equal 2^6 = 64
1098        assert!(sum + less_than_one as i32 <= 64);
1099    }
1100
1101    #[test]
1102    fn test_predefined_of_distribution_sum() {
1103        let sum: i32 = PREDEFINED_OF_DISTRIBUTION
1104            .iter()
1105            .filter(|&&x| x > 0)
1106            .map(|&x| x as i32)
1107            .sum();
1108        let less_than_one = PREDEFINED_OF_DISTRIBUTION
1109            .iter()
1110            .filter(|&&x| x == -1)
1111            .count();
1112
1113        // Total should equal 2^5 = 32
1114        assert!(sum + less_than_one as i32 <= 32);
1115    }
1116
1117    #[test]
1118    fn test_predefined_ml_distribution_sum() {
1119        let sum: i32 = PREDEFINED_ML_DISTRIBUTION
1120            .iter()
1121            .filter(|&&x| x > 0)
1122            .map(|&x| x as i32)
1123            .sum();
1124        let less_than_one = PREDEFINED_ML_DISTRIBUTION
1125            .iter()
1126            .filter(|&&x| x == -1)
1127            .count();
1128
1129        // Total should equal 2^6 = 64
1130        assert!(sum + less_than_one as i32 <= 64);
1131    }
1132
1133    #[test]
1134    fn test_literal_length_baseline_all_codes() {
1135        // Test all 36 literal length codes
1136        for code in 0..36u8 {
1137            let (bits, baseline) = LITERAL_LENGTH_BASELINE[code as usize];
1138            let result = decode_literal_length(code, 0);
1139            assert_eq!(result, baseline, "Code {} failed", code);
1140
1141            // Test with max extra bits
1142            if bits > 0 {
1143                let max_extra = (1u32 << bits) - 1;
1144                let result_max = decode_literal_length(code, max_extra);
1145                assert_eq!(result_max, baseline + max_extra);
1146            }
1147        }
1148    }
1149
1150    #[test]
1151    fn test_match_length_baseline_all_codes() {
1152        // Test all 53 match length codes
1153        for code in 0..53u8 {
1154            let (bits, baseline) = MATCH_LENGTH_BASELINE[code as usize];
1155            let result = decode_match_length(code, 0);
1156            assert_eq!(result, baseline, "Code {} failed", code);
1157
1158            if bits > 0 {
1159                let max_extra = (1u32 << bits) - 1;
1160                let result_max = decode_match_length(code, max_extra);
1161                assert_eq!(result_max, baseline + max_extra);
1162            }
1163        }
1164    }
1165
1166    #[test]
1167    fn test_offset_decoding_range() {
1168        // Per RFC 8878 Table 7: Offset_Value = (1 << Code) + Extra_Bits
1169        for code in 0..=30u8 {
1170            let base_offset = decode_offset(code, 0);
1171            assert_eq!(base_offset, 1u32 << code, "Code {} failed", code);
1172        }
1173
1174        // Code 31 is clamped to prevent overflow (1 << 31 would overflow u32)
1175        assert_eq!(decode_offset(31, 0), 1u32 << 31);
1176    }
1177
1178    #[test]
1179    fn test_sequences_with_predefined_mode() {
1180        // Build a minimal sequences section with predefined mode
1181        // Mode byte: 0b00000000 = all predefined (LL=0, OF=0, ML=0, reserved=0)
1182        let literals = LiteralsSection::new_raw(vec![]);
1183
1184        // 1 sequence, predefined mode, minimal bitstream
1185        let mut data = vec![1]; // 1 sequence
1186        data.push(0x00); // Mode byte: all predefined
1187
1188        // Add a valid bitstream with sentinel
1189        // The bitstream needs initial states and at least one sequence's worth of data
1190        data.push(0x80); // Minimal bitstream with sentinel
1191
1192        let result = SequencesSection::parse(&data, &literals);
1193        // This might fail due to bitstream issues, but the mode parsing should work
1194        if result.is_err() {
1195            // Expected for minimal bitstream
1196        } else {
1197            let section = result.unwrap();
1198            assert_eq!(section.num_sequences, 1);
1199        }
1200    }
1201
1202    #[test]
1203    fn test_mode_byte_parsing() {
1204        // Verify mode byte layout: LL[7:6], OF[5:4], ML[3:2], reserved[1:0]
1205        // All predefined: 0b00_00_00_00 = 0x00
1206        let mode_byte = 0x00u8;
1207        assert_eq!(
1208            SymbolMode::from_field((mode_byte >> 6) & 0x03),
1209            SymbolMode::Predefined
1210        );
1211        assert_eq!(
1212            SymbolMode::from_field((mode_byte >> 4) & 0x03),
1213            SymbolMode::Predefined
1214        );
1215        assert_eq!(
1216            SymbolMode::from_field((mode_byte >> 2) & 0x03),
1217            SymbolMode::Predefined
1218        );
1219
1220        // All RLE: 0b01_01_01_00 = 0x54
1221        let mode_byte = 0x54u8;
1222        assert_eq!(
1223            SymbolMode::from_field((mode_byte >> 6) & 0x03),
1224            SymbolMode::Rle
1225        );
1226        assert_eq!(
1227            SymbolMode::from_field((mode_byte >> 4) & 0x03),
1228            SymbolMode::Rle
1229        );
1230        assert_eq!(
1231            SymbolMode::from_field((mode_byte >> 2) & 0x03),
1232            SymbolMode::Rle
1233        );
1234
1235        // All FSE compressed: 0b10_10_10_00 = 0xA8
1236        let mode_byte = 0xA8u8;
1237        assert_eq!(
1238            SymbolMode::from_field((mode_byte >> 6) & 0x03),
1239            SymbolMode::Fse
1240        );
1241        assert_eq!(
1242            SymbolMode::from_field((mode_byte >> 4) & 0x03),
1243            SymbolMode::Fse
1244        );
1245        assert_eq!(
1246            SymbolMode::from_field((mode_byte >> 2) & 0x03),
1247            SymbolMode::Fse
1248        );
1249
1250        // All repeat: 0b11_11_11_00 = 0xFC
1251        let mode_byte = 0xFCu8;
1252        assert_eq!(
1253            SymbolMode::from_field((mode_byte >> 6) & 0x03),
1254            SymbolMode::Repeat
1255        );
1256        assert_eq!(
1257            SymbolMode::from_field((mode_byte >> 4) & 0x03),
1258            SymbolMode::Repeat
1259        );
1260        assert_eq!(
1261            SymbolMode::from_field((mode_byte >> 2) & 0x03),
1262            SymbolMode::Repeat
1263        );
1264    }
1265
1266    #[test]
1267    fn test_sequence_count_boundary_127() {
1268        // Boundary at 127: single byte
1269        let (count, size) = SequencesSection::parse_sequence_count(&[127]).unwrap();
1270        assert_eq!(count, 127);
1271        assert_eq!(size, 1);
1272    }
1273
1274    #[test]
1275    fn test_sequence_count_boundary_128() {
1276        // 128 uses 2 bytes: ((128 - 128) << 8) + byte1
1277        // For count = 128: byte0 = 128, byte1 = 128
1278        let (count, size) = SequencesSection::parse_sequence_count(&[128, 128]).unwrap();
1279        assert_eq!(count, 128);
1280        assert_eq!(size, 2);
1281    }
1282
1283    // =========================================================================
1284    // RepeatOffsets Tests
1285    // =========================================================================
1286
1287    #[test]
1288    fn test_repeat_offsets_initial_values() {
1289        let offsets = RepeatOffsets::new();
1290        assert_eq!(offsets.offsets, [1, 4, 8]);
1291    }
1292
1293    #[test]
1294    fn test_repeat_offsets_new_offset() {
1295        let mut offsets = RepeatOffsets::new();
1296
1297        // New offset: value > 3, actual = value - 3
1298        // offset_value 7 -> actual 4, becomes new repeat_offset_1
1299        let result = offsets.resolve(7, 5); // LL=5 (non-zero)
1300        assert_eq!(result, 4); // 7 - 3 = 4
1301        assert_eq!(offsets.offsets, [4, 1, 4]); // 4 pushed to front
1302    }
1303
1304    #[test]
1305    fn test_repeat_offsets_use_first() {
1306        let mut offsets = RepeatOffsets::new();
1307
1308        // Use repeat_offset_1 (value=1) with LL > 0
1309        let result = offsets.resolve(1, 5);
1310        assert_eq!(result, 1); // Initial repeat_offset_1
1311        assert_eq!(offsets.offsets, [1, 4, 8]); // No rotation when using first
1312    }
1313
1314    #[test]
1315    fn test_repeat_offsets_use_second() {
1316        let mut offsets = RepeatOffsets::new();
1317
1318        // Use repeat_offset_2 (value=2) with LL > 0
1319        let result = offsets.resolve(2, 5);
1320        assert_eq!(result, 4); // Initial repeat_offset_2
1321        assert_eq!(offsets.offsets, [4, 1, 8]); // 4 rotated to front
1322    }
1323
1324    #[test]
1325    fn test_repeat_offsets_use_third() {
1326        let mut offsets = RepeatOffsets::new();
1327
1328        // Use repeat_offset_3 (value=3) with LL > 0
1329        let result = offsets.resolve(3, 5);
1330        assert_eq!(result, 8); // Initial repeat_offset_3
1331        assert_eq!(offsets.offsets, [8, 1, 4]); // 8 rotated to front
1332    }
1333
1334    #[test]
1335    fn test_repeat_offsets_ll_zero_special_case_1() {
1336        let mut offsets = RepeatOffsets::new();
1337
1338        // When LL=0 and value=1: use repeat_offset_2 instead, swap positions
1339        let result = offsets.resolve(1, 0);
1340        assert_eq!(result, 4); // Uses repeat_offset_2
1341        assert_eq!(offsets.offsets, [4, 1, 8]); // Swapped positions 0 and 1
1342    }
1343
1344    #[test]
1345    fn test_repeat_offsets_ll_zero_special_case_2() {
1346        let mut offsets = RepeatOffsets::new();
1347
1348        // When LL=0 and value=2: use repeat_offset_3, rotate
1349        let result = offsets.resolve(2, 0);
1350        assert_eq!(result, 8); // Uses repeat_offset_3
1351        assert_eq!(offsets.offsets, [8, 1, 4]); // Rotated
1352    }
1353
1354    #[test]
1355    fn test_repeat_offsets_ll_zero_special_case_3() {
1356        let mut offsets = RepeatOffsets::new();
1357
1358        // When LL=0 and value=3: use repeat_offset_1 - 1
1359        // Initial repeat_offset_1 = 1, so 1 - 1 = 0, but minimum is 1
1360        let result = offsets.resolve(3, 0);
1361        assert_eq!(result, 1); // max(1-1, 1) = 1
1362        assert_eq!(offsets.offsets, [1, 1, 4]); // New value pushed to front
1363    }
1364
1365    #[test]
1366    fn test_repeat_offsets_ll_zero_case_3_larger_offset() {
1367        let mut offsets = RepeatOffsets::new();
1368
1369        // First set a larger repeat_offset_1
1370        offsets.resolve(13, 5); // 13 - 3 = 10
1371        assert_eq!(offsets.offsets[0], 10);
1372
1373        // Now LL=0 and value=3: use 10 - 1 = 9
1374        let result = offsets.resolve(3, 0);
1375        assert_eq!(result, 9);
1376        assert_eq!(offsets.offsets[0], 9);
1377    }
1378
1379    #[test]
1380    fn test_repeat_offsets_sequence_of_operations() {
1381        let mut offsets = RepeatOffsets::new();
1382        // Initial: [1, 4, 8]
1383
1384        // New offset 103 (value 106): 106 - 3 = 103
1385        offsets.resolve(106, 5);
1386        assert_eq!(offsets.offsets, [103, 1, 4]);
1387
1388        // Use repeat_offset_1 (value 1)
1389        let result = offsets.resolve(1, 5);
1390        assert_eq!(result, 103);
1391        assert_eq!(offsets.offsets, [103, 1, 4]); // No change
1392
1393        // Use repeat_offset_2 (value 2)
1394        let result = offsets.resolve(2, 5);
1395        assert_eq!(result, 1);
1396        assert_eq!(offsets.offsets, [1, 103, 4]); // Rotated
1397    }
1398}