Skip to main content

rar_stream/decompress/
huffman.rs

1//! Huffman decoder for RAR compression.
2//!
3//! RAR uses canonical Huffman codes with up to 15-bit code lengths.
4
5use super::{BitReader, DecompressError, Result};
6
7/// Maximum code length in bits.
8pub const MAX_CODE_LENGTH: usize = 15;
9
10/// Table sizes for RAR3/4 format.
11pub const MAINCODE_SIZE: usize = 299;
12pub const OFFSETCODE_SIZE: usize = 60;
13pub const LOWOFFSETCODE_SIZE: usize = 17;
14pub const LENGTHCODE_SIZE: usize = 28;
15pub const HUFFMAN_TABLE_SIZE: usize =
16    MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE + LENGTHCODE_SIZE;
17
18/// Huffman decoding table entry.
19#[derive(Clone, Copy, Default)]
20pub struct HuffmanEntry {
21    /// Symbol value
22    pub symbol: u16,
23    /// Code length in bits
24    pub length: u8,
25}
26
27/// Huffman decoding table.
28/// Uses a lookup table for fast decoding of short codes.
29pub struct HuffmanTable {
30    /// Quick lookup table for codes up to QUICK_BITS
31    quick_table: Vec<HuffmanEntry>,
32    /// Sorted symbols for longer codes
33    symbols: Vec<u16>,
34    /// Code length counts
35    length_counts: [u16; MAX_CODE_LENGTH + 1],
36    /// First code value for each length (right-aligned, canonical)
37    first_code: [u32; MAX_CODE_LENGTH + 1],
38    /// First symbol index for each length (same as unrar's DecodePos)
39    first_symbol: [u16; MAX_CODE_LENGTH + 1],
40    /// Left-aligned upper limit for each length (unrar's DecodeLen)
41    decode_len: [u32; MAX_CODE_LENGTH + 1],
42}
43
44/// Bits for quick lookup table.
45const QUICK_BITS: u32 = 10;
46const QUICK_SIZE: usize = 1 << QUICK_BITS;
47
48impl HuffmanTable {
49    /// Create a new Huffman table from code lengths.
50    pub fn new(lengths: &[u8]) -> Result<Self> {
51        let mut table = Self {
52            quick_table: vec![HuffmanEntry::default(); QUICK_SIZE],
53            symbols: vec![0; lengths.len()],
54            length_counts: [0; MAX_CODE_LENGTH + 1],
55            first_code: [0; MAX_CODE_LENGTH + 1],
56            first_symbol: [0; MAX_CODE_LENGTH + 1],
57            decode_len: [0; MAX_CODE_LENGTH + 1],
58        };
59
60        // Count code lengths
61        for &len in lengths {
62            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
63                table.length_counts[len as usize] += 1;
64            }
65        }
66
67        // Calculate first code for each length (canonical Huffman)
68        // AND decode_len (left-aligned upper limit, like unrar)
69        let mut code = 0u32;
70        let mut upper_limit = 0u32;
71        for i in 1..=MAX_CODE_LENGTH {
72            code = (code + table.length_counts[i - 1] as u32) << 1;
73            table.first_code[i] = code;
74
75            // unrar's DecodeLen calculation
76            upper_limit += table.length_counts[i] as u32;
77            table.decode_len[i] = upper_limit << (16 - i);
78            upper_limit *= 2;
79        }
80
81        // Calculate first symbol index for each length (same as unrar's DecodePos)
82        let mut idx = 0u16;
83        for i in 1..=MAX_CODE_LENGTH {
84            table.first_symbol[i] = idx;
85            idx += table.length_counts[i];
86        }
87
88        // Build symbol list sorted by code (unrar's DecodeNum)
89        let mut indices = table.first_symbol;
90        for (symbol, &len) in lengths.iter().enumerate() {
91            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
92                let i = indices[len as usize] as usize;
93                if i < table.symbols.len() {
94                    table.symbols[i] = symbol as u16;
95                    indices[len as usize] += 1;
96                }
97            }
98        }
99
100        // Build quick lookup table by iterating symbols (avoids O(n²) search)
101        for len in 1..=QUICK_BITS as usize {
102            let start_idx = table.first_symbol[len] as usize;
103            let end_idx = table.first_symbol[len + 1] as usize;
104
105            for (i, &symbol) in table.symbols[start_idx..end_idx].iter().enumerate() {
106                let code = table.first_code[len] + i as u32;
107                let fill_bits = QUICK_BITS - len as u32;
108                let start = (code << fill_bits) as usize;
109                let count = 1usize << fill_bits;
110
111                for j in 0..count {
112                    let entry_idx = start + j;
113                    if entry_idx < QUICK_SIZE {
114                        table.quick_table[entry_idx] = HuffmanEntry {
115                            symbol,
116                            length: len as u8,
117                        };
118                    }
119                }
120            }
121        }
122
123        Ok(table)
124    }
125
126    /// Rebuild the table from new code lengths, reusing existing allocations.
127    pub fn rebuild(&mut self, lengths: &[u8]) -> Result<()> {
128        // Resize symbols vec if needed (no fill - we overwrite valid entries)
129        if self.symbols.len() != lengths.len() {
130            self.symbols.resize(lengths.len(), 0);
131        }
132
133        // Reset array fields
134        self.length_counts = [0; MAX_CODE_LENGTH + 1];
135        self.first_code = [0; MAX_CODE_LENGTH + 1];
136        self.first_symbol = [0; MAX_CODE_LENGTH + 1];
137        self.decode_len = [0; MAX_CODE_LENGTH + 1];
138
139        // Count code lengths
140        for &len in lengths {
141            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
142                self.length_counts[len as usize] += 1;
143            }
144        }
145
146        // Calculate first code for each length (canonical Huffman)
147        // AND decode_len (left-aligned upper limit, like unrar)
148        let mut code = 0u32;
149        let mut upper_limit = 0u32;
150        for i in 1..=MAX_CODE_LENGTH {
151            code = (code + self.length_counts[i - 1] as u32) << 1;
152            self.first_code[i] = code;
153
154            // unrar's DecodeLen calculation
155            upper_limit += self.length_counts[i] as u32;
156            self.decode_len[i] = upper_limit << (16 - i);
157            upper_limit *= 2;
158        }
159
160        // Calculate first symbol index for each length
161        let mut idx = 0u16;
162        for i in 1..=MAX_CODE_LENGTH {
163            self.first_symbol[i] = idx;
164            idx += self.length_counts[i];
165        }
166
167        // Build symbol list sorted by code
168        let mut indices = self.first_symbol;
169        for (symbol, &len) in lengths.iter().enumerate() {
170            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
171                let i = indices[len as usize] as usize;
172                if i < self.symbols.len() {
173                    self.symbols[i] = symbol as u16;
174                    indices[len as usize] += 1;
175                }
176            }
177        }
178
179        // Rebuild quick lookup table - MUST clear to avoid stale entries
180        self.quick_table.fill(HuffmanEntry::default());
181
182        // Build quick table by iterating through symbols list (already sorted by code)
183        // This avoids O(n²) linear search for each symbol
184        for len in 1..=QUICK_BITS as usize {
185            let start_idx = self.first_symbol[len] as usize;
186            let end_idx = self.first_symbol[len + 1] as usize;
187
188            for (i, &symbol) in self.symbols[start_idx..end_idx].iter().enumerate() {
189                let code = self.first_code[len] + i as u32;
190                let fill_bits = QUICK_BITS - len as u32;
191                let start = (code << fill_bits) as usize;
192                let count = 1usize << fill_bits;
193
194                for j in 0..count {
195                    let entry_idx = start + j;
196                    if entry_idx < QUICK_SIZE {
197                        self.quick_table[entry_idx] = HuffmanEntry {
198                            symbol,
199                            length: len as u8,
200                        };
201                    }
202                }
203            }
204        }
205
206        Ok(())
207    }
208
209    /// Debug: dump the canonical codes for each symbol
210    #[cfg(test)]
211    pub fn dump_codes(&self, name: &str, lengths: &[u8]) {
212        eprintln!("=== {} Huffman codes ===", name);
213        eprintln!("length_counts[1..=15]: {:?}", &self.length_counts[1..=15]);
214        eprintln!("first_code[1..=15]: {:?}", &self.first_code[1..=15]);
215        eprintln!("first_symbol[1..=15]: {:?}", &self.first_symbol[1..=15]);
216        eprintln!("symbols: {:?}", &self.symbols);
217
218        for (symbol, &len) in lengths.iter().enumerate() {
219            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
220                // Find where this symbol is in the sorted list
221                let first_sym = self.first_symbol[len as usize] as usize;
222                let count = self.length_counts[len as usize] as usize;
223                let end = first_sym + count;
224
225                for i in first_sym..end {
226                    if i < self.symbols.len() && self.symbols[i] == symbol as u16 {
227                        let code = self.first_code[len as usize]
228                            + (i as u32 - self.first_symbol[len as usize] as u32);
229                        // Print code in binary with proper length padding
230                        let code_str: String = format!("{:0width$b}", code, width = len as usize);
231                        eprintln!("  symbol {:>2}: len={}, code={}", symbol, len, code_str);
232                        break;
233                    }
234                }
235            }
236        }
237    }
238
239    /// Get quick table entry for debugging
240    #[cfg(test)]
241    pub fn quick_table_entry(&self, index: usize) -> (u16, u8) {
242        if index < self.quick_table.len() {
243            let entry = &self.quick_table[index];
244            (entry.symbol, entry.length)
245        } else {
246            (0, 0)
247        }
248    }
249
250    /// Dump symbols array for debugging
251    #[cfg(test)]
252    pub fn dump_symbols(&self) -> Vec<u16> {
253        self.symbols.clone()
254    }
255
256    /// Dump first_symbol for debugging
257    #[cfg(test)]
258    pub fn dump_first_symbol(&self) -> Vec<u16> {
259        self.first_symbol[1..6].to_vec()
260    }
261
262    /// Dump decode_len for debugging
263    #[cfg(test)]
264    pub fn dump_decode_len(&self) -> Vec<u32> {
265        self.decode_len[1..8].to_vec()
266    }
267
268    /// Decode a symbol from the bit reader.
269    /// Uses unrar's DecodeNumber algorithm with left-aligned comparisons.
270    #[inline(always)]
271    pub fn decode(&self, reader: &mut BitReader) -> Result<u16> {
272        // Get 16 bits left-aligned
273        let bit_field = reader.peek_bits(16);
274
275        // Quick decode path - check against decode_len (upper limit)
276        // If bit_field < decode_len[10], the code is guaranteed to be ≤10 bits
277        // and have a valid quick table entry
278        if bit_field < self.decode_len[QUICK_BITS as usize] {
279            let code = (bit_field >> (16 - QUICK_BITS)) as usize;
280            // SAFETY: code is always < QUICK_SIZE due to the shift
281            let entry = unsafe { self.quick_table.get_unchecked(code) };
282            // SAFETY: decode_len check guarantees entry.length > 0
283            reader.advance_bits(entry.length as u32);
284            return Ok(entry.symbol);
285        }
286
287        // Slow path: find the matching bit length using unrar's algorithm
288        // SAFETY: decode_len has 16 entries, we access indices 11..15
289        let mut bits = MAX_CODE_LENGTH;
290        for i in (QUICK_BITS as usize + 1)..MAX_CODE_LENGTH {
291            if bit_field < unsafe { *self.decode_len.get_unchecked(i) } {
292                bits = i;
293                break;
294            }
295        }
296
297        // Consume the bits
298        reader.advance_bits(bits as u32);
299
300        // Calculate distance from start of this length's codes
301        // SAFETY: bits is always > 0 in slow path (>= QUICK_BITS+1 = 11)
302        let prev_len = unsafe { *self.decode_len.get_unchecked(bits - 1) };
303        let dist = ((bit_field - prev_len) >> (16 - bits)) as usize;
304
305        // Calculate position in symbol list
306        // SAFETY: first_symbol has 16 entries
307        let pos = unsafe { *self.first_symbol.get_unchecked(bits) } as usize + dist;
308
309        // SAFETY: valid RAR data guarantees pos < symbols.len()
310        // For corrupt data, we clamp to avoid panic
311        if pos >= self.symbols.len() {
312            return Ok(self.symbols.first().copied().unwrap_or(0));
313        }
314
315        Ok(unsafe { *self.symbols.get_unchecked(pos) })
316    }
317}
318
319/// Huffman decoder that can read code lengths from the stream.
320pub struct HuffmanDecoder {
321    /// Main code table (literals + lengths)
322    pub main_table: Option<HuffmanTable>,
323    /// Distance/offset table
324    pub dist_table: Option<HuffmanTable>,
325    /// Low distance table
326    pub low_dist_table: Option<HuffmanTable>,
327    /// Length table
328    pub len_table: Option<HuffmanTable>,
329    /// Old length table for delta encoding (like unrar's UnpOldTable)
330    old_length_table: [u8; HUFFMAN_TABLE_SIZE],
331    /// New length table being built (like unrar's local Table)
332    new_length_table: [u8; HUFFMAN_TABLE_SIZE],
333}
334
335impl HuffmanDecoder {
336    pub fn new() -> Self {
337        Self {
338            main_table: None,
339            dist_table: None,
340            low_dist_table: None,
341            len_table: None,
342            old_length_table: [0; HUFFMAN_TABLE_SIZE],
343            new_length_table: [0; HUFFMAN_TABLE_SIZE],
344        }
345    }
346
347    /// Reset the old length table (like unrar's memset(UnpOldTable,0,...))
348    pub fn reset_tables(&mut self) {
349        self.old_length_table = [0; HUFFMAN_TABLE_SIZE];
350    }
351
352    /// Read code lengths from the bit stream and build tables.
353    /// This matches the RAR3/4 format.
354    pub fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
355        // Read reset flag - if 0, we keep previous length table
356        let reset_tables = reader.read_bit()?;
357        if reset_tables {
358            self.old_length_table = [0; HUFFMAN_TABLE_SIZE];
359        }
360
361        #[cfg(test)]
362        eprintln!(
363            "reset_tables={}, bit_pos={}",
364            reset_tables,
365            reader.bit_position()
366        );
367
368        self.read_tables_inner(reader)
369    }
370
371    /// Read tables after header bits have been consumed.
372    pub fn read_tables_after_header(&mut self, reader: &mut BitReader) -> Result<()> {
373        self.read_tables_inner(reader)
374    }
375
376    /// Internal table reading.
377    fn read_tables_inner(&mut self, reader: &mut BitReader) -> Result<()> {
378        // Read bit lengths for the precode (20 symbols, 4 bits each)
379        let mut precode_lengths = [0u8; 20];
380        let mut i = 0;
381
382        #[cfg(test)]
383        let precode_start_bit = reader.bit_position();
384
385        #[cfg(test)]
386        if precode_start_bit > 100000 {
387            eprintln!("precode reader state: {}", reader.debug_state());
388        }
389
390        while i < 20 {
391            let len = reader.read_bits(4)? as u8;
392
393            if len == 0x0F {
394                // Special case: 0x0F could be length 15 or a zero run indicator
395                let zero_count = reader.read_bits(4)? as usize;
396                #[cfg(test)]
397                {
398                    if precode_start_bit > 100000 {
399                        eprintln!("  PRECODE[{}]: len=0x0F, zero_count={}", i, zero_count);
400                    }
401                }
402                if zero_count == 0 {
403                    // ZeroCount 0 means length is actually 15
404                    precode_lengths[i] = 15;
405                    i += 1;
406                } else {
407                    // ZeroCount > 0 means insert (ZeroCount + 2) zeros
408                    let fill_count = (zero_count + 2).min(20 - i);
409                    #[cfg(test)]
410                    {
411                        if precode_start_bit > 100000 {
412                            eprintln!(
413                                "    filling {} zeros (i={}..{})",
414                                fill_count,
415                                i,
416                                i + fill_count
417                            );
418                        }
419                    }
420                    for _ in 0..fill_count {
421                        precode_lengths[i] = 0;
422                        i += 1;
423                    }
424                }
425                continue;
426            }
427            precode_lengths[i] = len;
428            i += 1;
429        }
430
431        #[cfg(test)]
432        eprintln!(
433            "precode at bits {}..{}: {:?}",
434            precode_start_bit,
435            reader.bit_position(),
436            precode_lengths
437        );
438
439        #[cfg(test)]
440        if precode_start_bit > 100000 {
441            // Show old length_table values before reading new table
442            eprintln!(
443                "OLD length_table[0..30]: {:?}",
444                &self.old_length_table[0..30]
445            );
446            // Show OLD low_dist portion
447            let low_offset = MAINCODE_SIZE + OFFSETCODE_SIZE;
448            eprintln!(
449                "OLD length_table low_dist [{}..{}]: {:?}",
450                low_offset,
451                low_offset + LOWOFFSETCODE_SIZE,
452                &self.old_length_table[low_offset..low_offset + LOWOFFSETCODE_SIZE]
453            );
454            // Show reader state after precode
455            eprintln!("After precode: {}", reader.debug_state());
456            eprintln!("  peek_bytes(8): {:02x?}", reader.peek_bytes(8));
457        }
458
459        let precode_table = HuffmanTable::new(&precode_lengths)?;
460
461        #[cfg(test)]
462        if precode_start_bit > 100000 {
463            precode_table.dump_codes("SECOND precode", &precode_lengths);
464            // Show bits at start of main table reading
465            eprintln!(
466                "Bits at start of main table: {:016b} (peek 16) at bit_pos {}",
467                reader.peek_bits(16),
468                reader.bit_position()
469            );
470        }
471
472        // Read main length table using precode
473        // Like unrar: write to new_length_table, read old values from old_length_table
474        i = 0;
475        #[cfg(test)]
476        let mut sym_count = 0;
477        while i < HUFFMAN_TABLE_SIZE {
478            #[cfg(test)]
479            {
480                if precode_start_bit > 6060000 && precode_start_bit < 6065000 {
481                    // Debug EVERY symbol decode for this specific table
482                    if i >= 295 && i <= 365 {
483                        eprintln!(
484                            "DEBUG[{}]: bit_pos={}, peek16={:016b}",
485                            i,
486                            reader.bit_position(),
487                            reader.peek_bits(16)
488                        );
489                    }
490                }
491            }
492            let sym = precode_table.decode(reader)?;
493
494            #[cfg(test)]
495            {
496                if sym_count < 30 {
497                    eprint!("sym[{}]={} ", i, sym);
498                    sym_count += 1;
499                }
500                // For the problematic table, show every symbol
501                if precode_start_bit > 6060000
502                    && precode_start_bit < 6065000
503                    && i >= 295
504                    && i <= 365
505                {
506                    eprintln!("  sym at [{}] = {}", i, sym);
507                }
508            }
509
510            if sym < 16 {
511                // Add sym to old value at this position (mod 16)
512                // Read from old_length_table, write to new_length_table
513                let old_val = self.old_length_table[i];
514                self.new_length_table[i] = (old_val + sym as u8) & 0x0F;
515                #[cfg(test)]
516                {
517                    if precode_start_bit > 100000 && sym_count < 30 {
518                        eprintln!(
519                            " old={} sym={} new={}",
520                            old_val, sym, self.new_length_table[i]
521                        );
522                    }
523                }
524                i += 1;
525            } else if sym == 16 {
526                // Repeat previous length, count = 3 + 3bits
527                // Read from new_length_table (previous NEW value, like unrar's Table[I-1])
528                if i == 0 {
529                    return Err(DecompressError::InvalidHuffmanCode);
530                }
531                let count = 3 + reader.read_bits(3)? as usize;
532                let prev = self.new_length_table[i - 1];
533                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
534                    self.new_length_table[i] = prev;
535                    i += 1;
536                }
537            } else if sym == 17 {
538                // Repeat previous length, count = 11 + 7bits
539                if i == 0 {
540                    return Err(DecompressError::InvalidHuffmanCode);
541                }
542                let count = 11 + reader.read_bits(7)? as usize;
543                let prev = self.new_length_table[i - 1];
544                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
545                    self.new_length_table[i] = prev;
546                    i += 1;
547                }
548            } else if sym == 18 {
549                // Insert zeros, count = 3 + 3bits
550                let count_bits = reader.read_bits(3)? as usize;
551                let count = 3 + count_bits;
552                #[cfg(test)]
553                {
554                    if precode_start_bit > 100000 {
555                        eprintln!(
556                            " sym18: count_bits={} count={} filling i={}..{}",
557                            count_bits,
558                            count,
559                            i,
560                            i + count
561                        );
562                    }
563                }
564                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
565                    self.new_length_table[i] = 0;
566                    i += 1;
567                }
568            } else {
569                // sym == 19: Insert zeros, count = 11 + 7bits
570                let count = 11 + reader.read_bits(7)? as usize;
571                #[cfg(test)]
572                {
573                    if precode_start_bit > 6060000 && precode_start_bit < 6065000 {
574                        eprintln!(" sym19: count={} filling i={}..{}", count, i, i + count);
575                    }
576                }
577                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
578                    self.new_length_table[i] = 0;
579                    i += 1;
580                }
581            }
582            #[cfg(test)]
583            {
584                // Track when we reach low_dist region
585                let low_offset = MAINCODE_SIZE + OFFSETCODE_SIZE;
586                if precode_start_bit > 6060000
587                    && precode_start_bit < 6065000
588                    && i >= low_offset
589                    && i < low_offset + 5
590                {
591                    eprintln!(
592                        "  -> entering low_dist region at i={}, new_length_table[{}]={}",
593                        i, i, self.new_length_table[i]
594                    );
595                }
596            }
597        }
598
599        // Copy new table to old table for next table read (like unrar's memcpy)
600        self.old_length_table
601            .copy_from_slice(&self.new_length_table);
602
603        #[cfg(test)]
604        eprintln!();
605
606        #[cfg(test)]
607        eprintln!(
608            "new_length_table first 20: {:?}",
609            &self.new_length_table[..20]
610        );
611
612        #[cfg(test)]
613        if precode_start_bit > 100000 {
614            eprintln!(
615                "SECOND TABLE new_length_table[0..50]: {:?}",
616                &self.new_length_table[0..50]
617            );
618            // Show low_dist portion (offset 359..376)
619            let low_offset = MAINCODE_SIZE + OFFSETCODE_SIZE;
620            eprintln!(
621                "new_length_table low_dist portion [{}..{}]: {:?}",
622                low_offset,
623                low_offset + LOWOFFSETCODE_SIZE,
624                &self.new_length_table[low_offset..low_offset + LOWOFFSETCODE_SIZE]
625            );
626        }
627
628        // Build the four Huffman tables from new_length_table
629        // Use rebuild() if table exists to avoid allocation
630        let mut offset = 0;
631
632        let main_lengths = &self.new_length_table[offset..offset + MAINCODE_SIZE];
633        if let Some(ref mut table) = self.main_table {
634            table.rebuild(main_lengths)?;
635        } else {
636            self.main_table = Some(HuffmanTable::new(main_lengths)?);
637        }
638
639        #[cfg(test)]
640        {
641            // Debug: print codes for symbols we care about
642            let table = self.main_table.as_ref().unwrap();
643            for &sym in &[45u16, 71, 75, 89, 107, 185, 196, 256, 275] {
644                let len = main_lengths.get(sym as usize).copied().unwrap_or(0);
645                if len > 0 {
646                    // Find symbol position in sorted list
647                    let first_sym = table.first_symbol[len as usize];
648                    for i in first_sym..first_sym + table.length_counts[len as usize] {
649                        if table.symbols[i as usize] == sym {
650                            let code =
651                                table.first_code[len as usize] + (i as u32 - first_sym as u32);
652                            let bp = reader.bit_position();
653                            if bp > 6140000 && bp < 6290000 {
654                                eprintln!(
655                                    "main_table at {} symbol {}: len={}, code={:0width$b}",
656                                    bp,
657                                    sym,
658                                    len,
659                                    code,
660                                    width = len as usize
661                                );
662                            }
663                            break;
664                        }
665                    }
666                } else {
667                    let bp = reader.bit_position();
668                    if bp > 6140000 && bp < 6290000 {
669                        eprintln!("main_table at {} symbol {}: NOT IN TABLE", bp, sym);
670                    }
671                }
672            }
673        }
674
675        offset += MAINCODE_SIZE;
676
677        let dist_lengths = &self.new_length_table[offset..offset + OFFSETCODE_SIZE];
678        #[cfg(test)]
679        {
680            let bp = reader.bit_position();
681            if bp > 6140000 && bp < 6145000 {
682                eprintln!(
683                    "dist_table at bit_pos={} FULL lengths: {:?}",
684                    bp, dist_lengths
685                );
686            }
687        }
688        if let Some(ref mut table) = self.dist_table {
689            table.rebuild(dist_lengths)?;
690        } else {
691            self.dist_table = Some(HuffmanTable::new(dist_lengths)?);
692        }
693        offset += OFFSETCODE_SIZE;
694
695        #[cfg(test)]
696        {
697            let low_lengths = &self.new_length_table[offset..offset + LOWOFFSETCODE_SIZE];
698            eprintln!(
699                "low_dist_table lengths at bit_pos={}: {:?}",
700                reader.bit_position(),
701                low_lengths
702            );
703        }
704
705        let low_lengths = &self.new_length_table[offset..offset + LOWOFFSETCODE_SIZE];
706        if let Some(ref mut table) = self.low_dist_table {
707            table.rebuild(low_lengths)?;
708        } else {
709            self.low_dist_table = Some(HuffmanTable::new(low_lengths)?);
710        }
711
712        #[cfg(test)]
713        {
714            let low_lengths = &self.new_length_table[offset..offset + LOWOFFSETCODE_SIZE];
715            self.low_dist_table
716                .as_ref()
717                .unwrap()
718                .dump_codes("low_dist", low_lengths);
719            // Dump symbols array
720            eprintln!(
721                "low_dist symbols: {:?}",
722                self.low_dist_table.as_ref().unwrap().dump_symbols()
723            );
724            // Dump first_symbol
725            eprintln!(
726                "low_dist first_symbol[1..6]: {:?}",
727                self.low_dist_table.as_ref().unwrap().dump_first_symbol()
728            );
729            // Dump quick_table entries 512-575 (symbol 8's range)
730            eprintln!(
731                "low_dist quick_table[512..520]: {:?}",
732                (512..520)
733                    .map(|i| self.low_dist_table.as_ref().unwrap().quick_table_entry(i))
734                    .collect::<Vec<_>>()
735            );
736            eprintln!(
737                "low_dist quick_table[568]: {:?}",
738                self.low_dist_table.as_ref().unwrap().quick_table_entry(568)
739            );
740        }
741
742        offset += LOWOFFSETCODE_SIZE;
743
744        let len_lengths = &self.new_length_table[offset..offset + LENGTHCODE_SIZE];
745        #[cfg(test)]
746        {
747            eprintln!("len_table lengths[0..5]: {:?}", &len_lengths[0..5]);
748        }
749        if let Some(ref mut table) = self.len_table {
750            table.rebuild(len_lengths)?;
751        } else {
752            self.len_table = Some(HuffmanTable::new(len_lengths)?);
753        }
754
755        #[cfg(test)]
756        eprintln!("table reading done at bit_pos={}", reader.bit_position());
757
758        Ok(())
759    }
760}
761
762impl Default for HuffmanDecoder {
763    fn default() -> Self {
764        Self::new()
765    }
766}
767
768#[cfg(test)]
769mod tests {
770    use super::*;
771
772    #[test]
773    fn test_huffman_table_simple() {
774        // Simple table: 2 symbols with lengths [1, 1]
775        // Symbol 0 = code 0, Symbol 1 = code 1
776        let lengths = [1u8, 1];
777        let table = HuffmanTable::new(&lengths).unwrap();
778
779        let data = [0b10000000]; // First bit is 1 -> symbol 1
780        let mut reader = BitReader::new(&data);
781        assert_eq!(table.decode(&mut reader).unwrap(), 1);
782    }
783
784    #[test]
785    fn test_huffman_table_varying_lengths() {
786        // Symbol 0: length 1, code 0
787        // Symbol 1: length 2, code 10
788        // Symbol 2: length 2, code 11
789        let lengths = [1u8, 2, 2];
790        let table = HuffmanTable::new(&lengths).unwrap();
791
792        let data = [0b01011000]; // 0 (sym 0), 10 (sym 1), 11 (sym 2)
793        let mut reader = BitReader::new(&data);
794
795        assert_eq!(table.decode(&mut reader).unwrap(), 0);
796        assert_eq!(table.decode(&mut reader).unwrap(), 1);
797        assert_eq!(table.decode(&mut reader).unwrap(), 2);
798    }
799
800    #[test]
801    fn test_huffman_table_low_dist_all_4bit() {
802        // All symbols 0-15 have length 4, symbol 16 has length 0
803        // This is the typical low_dist table pattern
804        let lengths: Vec<u8> = vec![4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0];
805        let table = HuffmanTable::new(&lengths).unwrap();
806
807        // For all-4-bit codes:
808        // Symbol 0: code 0000
809        // Symbol 1: code 0001
810        // ...
811        // Symbol 8: code 1000
812        // ...
813        // Symbol 15: code 1111
814
815        // Check quick_table for index 568 (which is 1000111000 in 10 bits)
816        // First 4 bits = 1000 = 8, so symbol should be 8
817        let (sym, len) = table.quick_table_entry(568);
818        eprintln!("quick_table[568] = (sym={}, len={})", sym, len);
819        assert_eq!(sym, 8, "Symbol at quick_table[568] should be 8");
820        assert_eq!(len, 4, "Length at quick_table[568] should be 4");
821
822        // Check all entries from 512-575 (where first 4 bits = 1000)
823        for i in 512..576 {
824            let (s, l) = table.quick_table_entry(i);
825            assert_eq!(s, 8, "Symbol at quick_table[{}] should be 8, got {}", i, s);
826            assert_eq!(l, 4, "Length at quick_table[{}] should be 4, got {}", i, l);
827        }
828
829        // Test actual decoding
830        // Bits 1000... should decode to symbol 8
831        let data = [0b10000000, 0b00000000];
832        let mut reader = BitReader::new(&data);
833        assert_eq!(table.decode(&mut reader).unwrap(), 8);
834        assert_eq!(reader.bit_position(), 4); // Should consume exactly 4 bits
835    }
836
837    #[test]
838    fn test_huffman_table_rebuild_changes() {
839        // Start with a table that has different lengths
840        // This tests that rebuild() properly clears and rebuilds the quick_table
841        let initial_lengths: Vec<u8> = vec![4, 4, 5, 4, 3, 5, 4, 4, 4, 4, 4, 5, 4, 3, 5, 4, 0];
842        let mut table = HuffmanTable::new(&initial_lengths).unwrap();
843
844        // Now rebuild with all 4-bit codes
845        let new_lengths: Vec<u8> = vec![4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0];
846        table.rebuild(&new_lengths).unwrap();
847
848        // Check quick_table for index 568
849        let (sym, len) = table.quick_table_entry(568);
850        eprintln!(
851            "After rebuild: quick_table[568] = (sym={}, len={})",
852            sym, len
853        );
854        assert_eq!(
855            sym, 8,
856            "Symbol at quick_table[568] should be 8 after rebuild"
857        );
858        assert_eq!(
859            len, 4,
860            "Length at quick_table[568] should be 4 after rebuild"
861        );
862
863        // Check all entries from 512-575 (where first 4 bits = 1000)
864        for i in 512..576 {
865            let (s, l) = table.quick_table_entry(i);
866            assert_eq!(
867                s, 8,
868                "Symbol at quick_table[{}] should be 8 after rebuild, got {}",
869                i, s
870            );
871            assert_eq!(
872                l, 4,
873                "Length at quick_table[{}] should be 4 after rebuild, got {}",
874                i, l
875            );
876        }
877
878        // Test actual decoding
879        let data = [0b10000000, 0b00000000];
880        let mut reader = BitReader::new(&data);
881        assert_eq!(table.decode(&mut reader).unwrap(), 8);
882        assert_eq!(reader.bit_position(), 4);
883    }
884}