Skip to main content

rar_stream/decompress/
huffman.rs

1//! Huffman decoder for RAR compression.
2//!
3//! RAR uses canonical Huffman codes with up to 15-bit code lengths.
4
5use super::{BitReader, DecompressError, Result};
6
7/// Maximum code length in bits.
8pub const MAX_CODE_LENGTH: usize = 15;
9
10/// Table sizes for RAR3/4 format.
11pub const MAINCODE_SIZE: usize = 299;
12pub const OFFSETCODE_SIZE: usize = 60;
13pub const LOWOFFSETCODE_SIZE: usize = 17;
14pub const LENGTHCODE_SIZE: usize = 28;
15pub const HUFFMAN_TABLE_SIZE: usize =
16    MAINCODE_SIZE + OFFSETCODE_SIZE + LOWOFFSETCODE_SIZE + LENGTHCODE_SIZE;
17
18/// Huffman decoding table entry.
19#[derive(Clone, Copy, Default)]
20pub struct HuffmanEntry {
21    /// Symbol value
22    pub symbol: u16,
23    /// Code length in bits
24    pub length: u8,
25}
26
27/// Huffman decoding table.
28/// Uses a lookup table for fast decoding of short codes.
29pub struct HuffmanTable {
30    /// Quick lookup table for codes up to QUICK_BITS
31    quick_table: Vec<HuffmanEntry>,
32    /// Sorted symbols for longer codes
33    symbols: Vec<u16>,
34    /// Code length counts
35    length_counts: [u16; MAX_CODE_LENGTH + 1],
36    /// First code value for each length (right-aligned, canonical)
37    first_code: [u32; MAX_CODE_LENGTH + 1],
38    /// First symbol index for each length (same as unrar's DecodePos)
39    first_symbol: [u16; MAX_CODE_LENGTH + 1],
40    /// Left-aligned upper limit for each length (unrar's DecodeLen)
41    decode_len: [u32; MAX_CODE_LENGTH + 1],
42}
43
44/// Bits for quick lookup table.
45const QUICK_BITS: u32 = 10;
46const QUICK_SIZE: usize = 1 << QUICK_BITS;
47
48impl HuffmanTable {
49    /// Create a new Huffman table from code lengths.
50    pub fn new(lengths: &[u8]) -> Result<Self> {
51        let mut table = Self {
52            quick_table: vec![HuffmanEntry::default(); QUICK_SIZE],
53            symbols: vec![0; lengths.len()],
54            length_counts: [0; MAX_CODE_LENGTH + 1],
55            first_code: [0; MAX_CODE_LENGTH + 1],
56            first_symbol: [0; MAX_CODE_LENGTH + 1],
57            decode_len: [0; MAX_CODE_LENGTH + 1],
58        };
59
60        // Count code lengths
61        for &len in lengths {
62            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
63                table.length_counts[len as usize] += 1;
64            }
65        }
66
67        // Calculate first code for each length (canonical Huffman)
68        // AND decode_len (left-aligned upper limit, like unrar)
69        let mut code = 0u32;
70        let mut upper_limit = 0u32;
71        for i in 1..=MAX_CODE_LENGTH {
72            code = (code + table.length_counts[i - 1] as u32) << 1;
73            table.first_code[i] = code;
74
75            // unrar's DecodeLen calculation
76            upper_limit += table.length_counts[i] as u32;
77            table.decode_len[i] = upper_limit << (16 - i);
78            upper_limit *= 2;
79        }
80
81        // Calculate first symbol index for each length (same as unrar's DecodePos)
82        let mut idx = 0u16;
83        for i in 1..=MAX_CODE_LENGTH {
84            table.first_symbol[i] = idx;
85            idx += table.length_counts[i];
86        }
87
88        // Build symbol list sorted by code (unrar's DecodeNum)
89        let mut indices = table.first_symbol;
90        for (symbol, &len) in lengths.iter().enumerate() {
91            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
92                let i = indices[len as usize] as usize;
93                if i < table.symbols.len() {
94                    table.symbols[i] = symbol as u16;
95                    indices[len as usize] += 1;
96                }
97            }
98        }
99
100        // Build quick lookup table by iterating symbols (avoids O(n²) search)
101        for len in 1..=QUICK_BITS as usize {
102            let start_idx = table.first_symbol[len] as usize;
103            let end_idx = table.first_symbol[len + 1] as usize;
104
105            for (i, &symbol) in table.symbols[start_idx..end_idx].iter().enumerate() {
106                let code = table.first_code[len] + i as u32;
107                let fill_bits = QUICK_BITS - len as u32;
108                let start = (code << fill_bits) as usize;
109                let count = 1usize << fill_bits;
110
111                for j in 0..count {
112                    let entry_idx = start + j;
113                    if entry_idx < QUICK_SIZE {
114                        table.quick_table[entry_idx] = HuffmanEntry {
115                            symbol,
116                            length: len as u8,
117                        };
118                    }
119                }
120            }
121        }
122
123        Ok(table)
124    }
125
126    /// Rebuild the table from new code lengths, reusing existing allocations.
127    pub fn rebuild(&mut self, lengths: &[u8]) -> Result<()> {
128        // Resize symbols vec if needed (no fill - we overwrite valid entries)
129        if self.symbols.len() != lengths.len() {
130            self.symbols.resize(lengths.len(), 0);
131        }
132
133        // Reset array fields
134        self.length_counts = [0; MAX_CODE_LENGTH + 1];
135        self.first_code = [0; MAX_CODE_LENGTH + 1];
136        self.first_symbol = [0; MAX_CODE_LENGTH + 1];
137        self.decode_len = [0; MAX_CODE_LENGTH + 1];
138
139        // Count code lengths
140        for &len in lengths {
141            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
142                self.length_counts[len as usize] += 1;
143            }
144        }
145
146        // Calculate first code for each length (canonical Huffman)
147        // AND decode_len (left-aligned upper limit, like unrar)
148        let mut code = 0u32;
149        let mut upper_limit = 0u32;
150        for i in 1..=MAX_CODE_LENGTH {
151            code = (code + self.length_counts[i - 1] as u32) << 1;
152            self.first_code[i] = code;
153
154            // unrar's DecodeLen calculation
155            upper_limit += self.length_counts[i] as u32;
156            self.decode_len[i] = upper_limit << (16 - i);
157            upper_limit *= 2;
158        }
159
160        // Calculate first symbol index for each length
161        let mut idx = 0u16;
162        for i in 1..=MAX_CODE_LENGTH {
163            self.first_symbol[i] = idx;
164            idx += self.length_counts[i];
165        }
166
167        // Build symbol list sorted by code
168        let mut indices = self.first_symbol;
169        for (symbol, &len) in lengths.iter().enumerate() {
170            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
171                let i = indices[len as usize] as usize;
172                if i < self.symbols.len() {
173                    self.symbols[i] = symbol as u16;
174                    indices[len as usize] += 1;
175                }
176            }
177        }
178
179        // Rebuild quick lookup table - MUST clear to avoid stale entries
180        self.quick_table.fill(HuffmanEntry::default());
181
182        // Build quick table by iterating through symbols list (already sorted by code)
183        // This avoids O(n²) linear search for each symbol
184        for len in 1..=QUICK_BITS as usize {
185            let start_idx = self.first_symbol[len] as usize;
186            let end_idx = self.first_symbol[len + 1] as usize;
187
188            for (i, &symbol) in self.symbols[start_idx..end_idx].iter().enumerate() {
189                let code = self.first_code[len] + i as u32;
190                let fill_bits = QUICK_BITS - len as u32;
191                let start = (code << fill_bits) as usize;
192                let count = 1usize << fill_bits;
193
194                for j in 0..count {
195                    let entry_idx = start + j;
196                    if entry_idx < QUICK_SIZE {
197                        self.quick_table[entry_idx] = HuffmanEntry {
198                            symbol,
199                            length: len as u8,
200                        };
201                    }
202                }
203            }
204        }
205
206        Ok(())
207    }
208
209    /// Debug: dump the canonical codes for each symbol
210    #[cfg(test)]
211    pub fn dump_codes(&self, name: &str, lengths: &[u8]) {
212        eprintln!("=== {} Huffman codes ===", name);
213        eprintln!("length_counts[1..=15]: {:?}", &self.length_counts[1..=15]);
214        eprintln!("first_code[1..=15]: {:?}", &self.first_code[1..=15]);
215        eprintln!("first_symbol[1..=15]: {:?}", &self.first_symbol[1..=15]);
216        eprintln!("symbols: {:?}", &self.symbols);
217
218        for (symbol, &len) in lengths.iter().enumerate() {
219            if len > 0 && (len as usize) <= MAX_CODE_LENGTH {
220                // Find where this symbol is in the sorted list
221                let first_sym = self.first_symbol[len as usize] as usize;
222                let count = self.length_counts[len as usize] as usize;
223                let end = first_sym + count;
224
225                for i in first_sym..end {
226                    if i < self.symbols.len() && self.symbols[i] == symbol as u16 {
227                        let code = self.first_code[len as usize]
228                            + (i as u32 - self.first_symbol[len as usize] as u32);
229                        // Print code in binary with proper length padding
230                        let code_str: String = format!("{:0width$b}", code, width = len as usize);
231                        eprintln!("  symbol {:>2}: len={}, code={}", symbol, len, code_str);
232                        break;
233                    }
234                }
235            }
236        }
237    }
238
239    /// Get quick table entry for debugging
240    #[cfg(test)]
241    pub fn quick_table_entry(&self, index: usize) -> (u16, u8) {
242        if index < self.quick_table.len() {
243            let entry = &self.quick_table[index];
244            (entry.symbol, entry.length)
245        } else {
246            (0, 0)
247        }
248    }
249
250    /// Dump symbols array for debugging
251    #[cfg(test)]
252    pub fn dump_symbols(&self) -> Vec<u16> {
253        self.symbols.clone()
254    }
255
256    /// Dump first_symbol for debugging
257    #[cfg(test)]
258    pub fn dump_first_symbol(&self) -> Vec<u16> {
259        self.first_symbol[1..6].to_vec()
260    }
261
262    /// Dump decode_len for debugging
263    #[cfg(test)]
264    pub fn dump_decode_len(&self) -> Vec<u32> {
265        self.decode_len[1..8].to_vec()
266    }
267
268    /// Decode a symbol from the bit reader.
269    /// Uses unrar's DecodeNumber algorithm with left-aligned comparisons.
270    #[inline(always)]
271    pub fn decode(&self, reader: &mut BitReader) -> Result<u16> {
272        // Get 16 bits left-aligned
273        let bit_field = reader.peek_bits(16);
274
275        // Quick decode path - check against decode_len (upper limit)
276        // If bit_field < decode_len[10], the code is guaranteed to be ≤10 bits
277        // and have a valid quick table entry
278        if bit_field < self.decode_len[QUICK_BITS as usize] {
279            let code = (bit_field >> (16 - QUICK_BITS)) as usize;
280            // SAFETY: code is always < QUICK_SIZE due to the shift
281            let entry = unsafe { self.quick_table.get_unchecked(code) };
282            // SAFETY: decode_len check guarantees entry.length > 0
283            reader.advance_bits(entry.length as u32);
284            return Ok(entry.symbol);
285        }
286
287        // Slow path: find matching bit length (codes 11-15 bits).
288        // Unrolled for branch-predictor friendliness.
289        // SAFETY: decode_len has 16 entries, we access indices 11..15
290        let bits;
291        unsafe {
292            if bit_field < *self.decode_len.get_unchecked(11) {
293                bits = 11;
294            } else if bit_field < *self.decode_len.get_unchecked(12) {
295                bits = 12;
296            } else if bit_field < *self.decode_len.get_unchecked(13) {
297                bits = 13;
298            } else if bit_field < *self.decode_len.get_unchecked(14) {
299                bits = 14;
300            } else {
301                bits = MAX_CODE_LENGTH;
302            }
303        }
304
305        // Consume the bits
306        reader.advance_bits(bits as u32);
307
308        // Calculate distance from start of this length's codes
309        // SAFETY: bits is always > 0 in slow path (>= QUICK_BITS+1 = 11)
310        let prev_len = unsafe { *self.decode_len.get_unchecked(bits - 1) };
311        let dist = ((bit_field - prev_len) >> (16 - bits)) as usize;
312
313        // Calculate position in symbol list
314        // SAFETY: first_symbol has 16 entries
315        let pos = unsafe { *self.first_symbol.get_unchecked(bits) } as usize + dist;
316
317        // SAFETY: valid RAR data guarantees pos < symbols.len()
318        // For corrupt data, we clamp to avoid panic
319        if pos >= self.symbols.len() {
320            return Ok(self.symbols.first().copied().unwrap_or(0));
321        }
322
323        Ok(unsafe { *self.symbols.get_unchecked(pos) })
324    }
325}
326
327/// Huffman decoder that can read code lengths from the stream.
328pub struct HuffmanDecoder {
329    /// Main code table (literals + lengths)
330    pub main_table: Option<HuffmanTable>,
331    /// Distance/offset table
332    pub dist_table: Option<HuffmanTable>,
333    /// Low distance table
334    pub low_dist_table: Option<HuffmanTable>,
335    /// Length table
336    pub len_table: Option<HuffmanTable>,
337    /// Old length table for delta encoding (like unrar's UnpOldTable)
338    old_length_table: [u8; HUFFMAN_TABLE_SIZE],
339    /// New length table being built (like unrar's local Table)
340    new_length_table: [u8; HUFFMAN_TABLE_SIZE],
341}
342
343impl HuffmanDecoder {
344    pub fn new() -> Self {
345        Self {
346            main_table: None,
347            dist_table: None,
348            low_dist_table: None,
349            len_table: None,
350            old_length_table: [0; HUFFMAN_TABLE_SIZE],
351            new_length_table: [0; HUFFMAN_TABLE_SIZE],
352        }
353    }
354
355    /// Reset the old length table (like unrar's memset(UnpOldTable,0,...))
356    pub fn reset_tables(&mut self) {
357        self.old_length_table = [0; HUFFMAN_TABLE_SIZE];
358    }
359
360    /// Read code lengths from the bit stream and build tables.
361    /// This matches the RAR3/4 format.
362    pub fn read_tables(&mut self, reader: &mut BitReader) -> Result<()> {
363        // Read reset flag - if 0, we keep previous length table
364        let reset_tables = reader.read_bit()?;
365        if reset_tables {
366            self.old_length_table = [0; HUFFMAN_TABLE_SIZE];
367        }
368
369        #[cfg(test)]
370        eprintln!(
371            "reset_tables={}, bit_pos={}",
372            reset_tables,
373            reader.bit_position()
374        );
375
376        self.read_tables_inner(reader)
377    }
378
379    /// Read tables after header bits have been consumed.
380    pub fn read_tables_after_header(&mut self, reader: &mut BitReader) -> Result<()> {
381        self.read_tables_inner(reader)
382    }
383
384    /// Internal table reading.
385    fn read_tables_inner(&mut self, reader: &mut BitReader) -> Result<()> {
386        // Read bit lengths for the precode (20 symbols, 4 bits each)
387        let mut precode_lengths = [0u8; 20];
388        let mut i = 0;
389
390        #[cfg(test)]
391        let precode_start_bit = reader.bit_position();
392
393        #[cfg(test)]
394        if precode_start_bit > 100000 {
395            eprintln!("precode reader state: {}", reader.debug_state());
396        }
397
398        while i < 20 {
399            let len = reader.read_bits(4)? as u8;
400
401            if len == 0x0F {
402                // Special case: 0x0F could be length 15 or a zero run indicator
403                let zero_count = reader.read_bits(4)? as usize;
404                #[cfg(test)]
405                {
406                    if precode_start_bit > 100000 {
407                        eprintln!("  PRECODE[{}]: len=0x0F, zero_count={}", i, zero_count);
408                    }
409                }
410                if zero_count == 0 {
411                    // ZeroCount 0 means length is actually 15
412                    precode_lengths[i] = 15;
413                    i += 1;
414                } else {
415                    // ZeroCount > 0 means insert (ZeroCount + 2) zeros
416                    let fill_count = (zero_count + 2).min(20 - i);
417                    #[cfg(test)]
418                    {
419                        if precode_start_bit > 100000 {
420                            eprintln!(
421                                "    filling {} zeros (i={}..{})",
422                                fill_count,
423                                i,
424                                i + fill_count
425                            );
426                        }
427                    }
428                    for _ in 0..fill_count {
429                        precode_lengths[i] = 0;
430                        i += 1;
431                    }
432                }
433                continue;
434            }
435            precode_lengths[i] = len;
436            i += 1;
437        }
438
439        #[cfg(test)]
440        eprintln!(
441            "precode at bits {}..{}: {:?}",
442            precode_start_bit,
443            reader.bit_position(),
444            precode_lengths
445        );
446
447        #[cfg(test)]
448        if precode_start_bit > 100000 {
449            // Show old length_table values before reading new table
450            eprintln!(
451                "OLD length_table[0..30]: {:?}",
452                &self.old_length_table[0..30]
453            );
454            // Show OLD low_dist portion
455            let low_offset = MAINCODE_SIZE + OFFSETCODE_SIZE;
456            eprintln!(
457                "OLD length_table low_dist [{}..{}]: {:?}",
458                low_offset,
459                low_offset + LOWOFFSETCODE_SIZE,
460                &self.old_length_table[low_offset..low_offset + LOWOFFSETCODE_SIZE]
461            );
462            // Show reader state after precode
463            eprintln!("After precode: {}", reader.debug_state());
464            eprintln!("  peek_bytes(8): {:02x?}", reader.peek_bytes(8));
465        }
466
467        let precode_table = HuffmanTable::new(&precode_lengths)?;
468
469        #[cfg(test)]
470        if precode_start_bit > 100000 {
471            precode_table.dump_codes("SECOND precode", &precode_lengths);
472            // Show bits at start of main table reading
473            eprintln!(
474                "Bits at start of main table: {:016b} (peek 16) at bit_pos {}",
475                reader.peek_bits(16),
476                reader.bit_position()
477            );
478        }
479
480        // Read main length table using precode
481        // Like unrar: write to new_length_table, read old values from old_length_table
482        i = 0;
483        #[cfg(test)]
484        let mut sym_count = 0;
485        while i < HUFFMAN_TABLE_SIZE {
486            #[cfg(test)]
487            {
488                if precode_start_bit > 6060000 && precode_start_bit < 6065000 {
489                    // Debug EVERY symbol decode for this specific table
490                    if i >= 295 && i <= 365 {
491                        eprintln!(
492                            "DEBUG[{}]: bit_pos={}, peek16={:016b}",
493                            i,
494                            reader.bit_position(),
495                            reader.peek_bits(16)
496                        );
497                    }
498                }
499            }
500            let sym = precode_table.decode(reader)?;
501
502            #[cfg(test)]
503            {
504                if sym_count < 30 {
505                    eprint!("sym[{}]={} ", i, sym);
506                    sym_count += 1;
507                }
508                // For the problematic table, show every symbol
509                if precode_start_bit > 6060000
510                    && precode_start_bit < 6065000
511                    && i >= 295
512                    && i <= 365
513                {
514                    eprintln!("  sym at [{}] = {}", i, sym);
515                }
516            }
517
518            if sym < 16 {
519                // Add sym to old value at this position (mod 16)
520                // Read from old_length_table, write to new_length_table
521                let old_val = self.old_length_table[i];
522                self.new_length_table[i] = (old_val + sym as u8) & 0x0F;
523                #[cfg(test)]
524                {
525                    if precode_start_bit > 100000 && sym_count < 30 {
526                        eprintln!(
527                            " old={} sym={} new={}",
528                            old_val, sym, self.new_length_table[i]
529                        );
530                    }
531                }
532                i += 1;
533            } else if sym == 16 {
534                // Repeat previous length, count = 3 + 3bits
535                // Read from new_length_table (previous NEW value, like unrar's Table[I-1])
536                if i == 0 {
537                    return Err(DecompressError::InvalidHuffmanCode);
538                }
539                let count = 3 + reader.read_bits(3)? as usize;
540                let prev = self.new_length_table[i - 1];
541                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
542                    self.new_length_table[i] = prev;
543                    i += 1;
544                }
545            } else if sym == 17 {
546                // Repeat previous length, count = 11 + 7bits
547                if i == 0 {
548                    return Err(DecompressError::InvalidHuffmanCode);
549                }
550                let count = 11 + reader.read_bits(7)? as usize;
551                let prev = self.new_length_table[i - 1];
552                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
553                    self.new_length_table[i] = prev;
554                    i += 1;
555                }
556            } else if sym == 18 {
557                // Insert zeros, count = 3 + 3bits
558                let count_bits = reader.read_bits(3)? as usize;
559                let count = 3 + count_bits;
560                #[cfg(test)]
561                {
562                    if precode_start_bit > 100000 {
563                        eprintln!(
564                            " sym18: count_bits={} count={} filling i={}..{}",
565                            count_bits,
566                            count,
567                            i,
568                            i + count
569                        );
570                    }
571                }
572                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
573                    self.new_length_table[i] = 0;
574                    i += 1;
575                }
576            } else {
577                // sym == 19: Insert zeros, count = 11 + 7bits
578                let count = 11 + reader.read_bits(7)? as usize;
579                #[cfg(test)]
580                {
581                    if precode_start_bit > 6060000 && precode_start_bit < 6065000 {
582                        eprintln!(" sym19: count={} filling i={}..{}", count, i, i + count);
583                    }
584                }
585                for _ in 0..count.min(HUFFMAN_TABLE_SIZE - i) {
586                    self.new_length_table[i] = 0;
587                    i += 1;
588                }
589            }
590            #[cfg(test)]
591            {
592                // Track when we reach low_dist region
593                let low_offset = MAINCODE_SIZE + OFFSETCODE_SIZE;
594                if precode_start_bit > 6060000
595                    && precode_start_bit < 6065000
596                    && i >= low_offset
597                    && i < low_offset + 5
598                {
599                    eprintln!(
600                        "  -> entering low_dist region at i={}, new_length_table[{}]={}",
601                        i, i, self.new_length_table[i]
602                    );
603                }
604            }
605        }
606
607        // Copy new table to old table for next table read (like unrar's memcpy)
608        self.old_length_table
609            .copy_from_slice(&self.new_length_table);
610
611        #[cfg(test)]
612        eprintln!();
613
614        #[cfg(test)]
615        eprintln!(
616            "new_length_table first 20: {:?}",
617            &self.new_length_table[..20]
618        );
619
620        #[cfg(test)]
621        if precode_start_bit > 100000 {
622            eprintln!(
623                "SECOND TABLE new_length_table[0..50]: {:?}",
624                &self.new_length_table[0..50]
625            );
626            // Show low_dist portion (offset 359..376)
627            let low_offset = MAINCODE_SIZE + OFFSETCODE_SIZE;
628            eprintln!(
629                "new_length_table low_dist portion [{}..{}]: {:?}",
630                low_offset,
631                low_offset + LOWOFFSETCODE_SIZE,
632                &self.new_length_table[low_offset..low_offset + LOWOFFSETCODE_SIZE]
633            );
634        }
635
636        // Build the four Huffman tables from new_length_table
637        // Use rebuild() if table exists to avoid allocation
638        let mut offset = 0;
639
640        let main_lengths = &self.new_length_table[offset..offset + MAINCODE_SIZE];
641        if let Some(ref mut table) = self.main_table {
642            table.rebuild(main_lengths)?;
643        } else {
644            self.main_table = Some(HuffmanTable::new(main_lengths)?);
645        }
646
647        #[cfg(test)]
648        {
649            // Debug: print codes for symbols we care about
650            let table = self.main_table.as_ref().unwrap();
651            for &sym in &[45u16, 71, 75, 89, 107, 185, 196, 256, 275] {
652                let len = main_lengths.get(sym as usize).copied().unwrap_or(0);
653                if len > 0 {
654                    // Find symbol position in sorted list
655                    let first_sym = table.first_symbol[len as usize];
656                    for i in first_sym..first_sym + table.length_counts[len as usize] {
657                        if table.symbols[i as usize] == sym {
658                            let code =
659                                table.first_code[len as usize] + (i as u32 - first_sym as u32);
660                            let bp = reader.bit_position();
661                            if bp > 6140000 && bp < 6290000 {
662                                eprintln!(
663                                    "main_table at {} symbol {}: len={}, code={:0width$b}",
664                                    bp,
665                                    sym,
666                                    len,
667                                    code,
668                                    width = len as usize
669                                );
670                            }
671                            break;
672                        }
673                    }
674                } else {
675                    let bp = reader.bit_position();
676                    if bp > 6140000 && bp < 6290000 {
677                        eprintln!("main_table at {} symbol {}: NOT IN TABLE", bp, sym);
678                    }
679                }
680            }
681        }
682
683        offset += MAINCODE_SIZE;
684
685        let dist_lengths = &self.new_length_table[offset..offset + OFFSETCODE_SIZE];
686        #[cfg(test)]
687        {
688            let bp = reader.bit_position();
689            if bp > 6140000 && bp < 6145000 {
690                eprintln!(
691                    "dist_table at bit_pos={} FULL lengths: {:?}",
692                    bp, dist_lengths
693                );
694            }
695        }
696        if let Some(ref mut table) = self.dist_table {
697            table.rebuild(dist_lengths)?;
698        } else {
699            self.dist_table = Some(HuffmanTable::new(dist_lengths)?);
700        }
701        offset += OFFSETCODE_SIZE;
702
703        #[cfg(test)]
704        {
705            let low_lengths = &self.new_length_table[offset..offset + LOWOFFSETCODE_SIZE];
706            eprintln!(
707                "low_dist_table lengths at bit_pos={}: {:?}",
708                reader.bit_position(),
709                low_lengths
710            );
711        }
712
713        let low_lengths = &self.new_length_table[offset..offset + LOWOFFSETCODE_SIZE];
714        if let Some(ref mut table) = self.low_dist_table {
715            table.rebuild(low_lengths)?;
716        } else {
717            self.low_dist_table = Some(HuffmanTable::new(low_lengths)?);
718        }
719
720        #[cfg(test)]
721        {
722            let low_lengths = &self.new_length_table[offset..offset + LOWOFFSETCODE_SIZE];
723            self.low_dist_table
724                .as_ref()
725                .unwrap()
726                .dump_codes("low_dist", low_lengths);
727            // Dump symbols array
728            eprintln!(
729                "low_dist symbols: {:?}",
730                self.low_dist_table.as_ref().unwrap().dump_symbols()
731            );
732            // Dump first_symbol
733            eprintln!(
734                "low_dist first_symbol[1..6]: {:?}",
735                self.low_dist_table.as_ref().unwrap().dump_first_symbol()
736            );
737            // Dump quick_table entries 512-575 (symbol 8's range)
738            eprintln!(
739                "low_dist quick_table[512..520]: {:?}",
740                (512..520)
741                    .map(|i| self.low_dist_table.as_ref().unwrap().quick_table_entry(i))
742                    .collect::<Vec<_>>()
743            );
744            eprintln!(
745                "low_dist quick_table[568]: {:?}",
746                self.low_dist_table.as_ref().unwrap().quick_table_entry(568)
747            );
748        }
749
750        offset += LOWOFFSETCODE_SIZE;
751
752        let len_lengths = &self.new_length_table[offset..offset + LENGTHCODE_SIZE];
753        #[cfg(test)]
754        {
755            eprintln!("len_table lengths[0..5]: {:?}", &len_lengths[0..5]);
756        }
757        if let Some(ref mut table) = self.len_table {
758            table.rebuild(len_lengths)?;
759        } else {
760            self.len_table = Some(HuffmanTable::new(len_lengths)?);
761        }
762
763        #[cfg(test)]
764        eprintln!("table reading done at bit_pos={}", reader.bit_position());
765
766        Ok(())
767    }
768}
769
770impl Default for HuffmanDecoder {
771    fn default() -> Self {
772        Self::new()
773    }
774}
775
776#[cfg(test)]
777mod tests {
778    use super::*;
779
780    #[test]
781    fn test_huffman_table_simple() {
782        // Simple table: 2 symbols with lengths [1, 1]
783        // Symbol 0 = code 0, Symbol 1 = code 1
784        let lengths = [1u8, 1];
785        let table = HuffmanTable::new(&lengths).unwrap();
786
787        let data = [0b10000000]; // First bit is 1 -> symbol 1
788        let mut reader = BitReader::new(&data);
789        assert_eq!(table.decode(&mut reader).unwrap(), 1);
790    }
791
792    #[test]
793    fn test_huffman_table_varying_lengths() {
794        // Symbol 0: length 1, code 0
795        // Symbol 1: length 2, code 10
796        // Symbol 2: length 2, code 11
797        let lengths = [1u8, 2, 2];
798        let table = HuffmanTable::new(&lengths).unwrap();
799
800        let data = [0b01011000]; // 0 (sym 0), 10 (sym 1), 11 (sym 2)
801        let mut reader = BitReader::new(&data);
802
803        assert_eq!(table.decode(&mut reader).unwrap(), 0);
804        assert_eq!(table.decode(&mut reader).unwrap(), 1);
805        assert_eq!(table.decode(&mut reader).unwrap(), 2);
806    }
807
808    #[test]
809    fn test_huffman_table_low_dist_all_4bit() {
810        // All symbols 0-15 have length 4, symbol 16 has length 0
811        // This is the typical low_dist table pattern
812        let lengths: Vec<u8> = vec![4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0];
813        let table = HuffmanTable::new(&lengths).unwrap();
814
815        // For all-4-bit codes:
816        // Symbol 0: code 0000
817        // Symbol 1: code 0001
818        // ...
819        // Symbol 8: code 1000
820        // ...
821        // Symbol 15: code 1111
822
823        // Check quick_table for index 568 (which is 1000111000 in 10 bits)
824        // First 4 bits = 1000 = 8, so symbol should be 8
825        let (sym, len) = table.quick_table_entry(568);
826        eprintln!("quick_table[568] = (sym={}, len={})", sym, len);
827        assert_eq!(sym, 8, "Symbol at quick_table[568] should be 8");
828        assert_eq!(len, 4, "Length at quick_table[568] should be 4");
829
830        // Check all entries from 512-575 (where first 4 bits = 1000)
831        for i in 512..576 {
832            let (s, l) = table.quick_table_entry(i);
833            assert_eq!(s, 8, "Symbol at quick_table[{}] should be 8, got {}", i, s);
834            assert_eq!(l, 4, "Length at quick_table[{}] should be 4, got {}", i, l);
835        }
836
837        // Test actual decoding
838        // Bits 1000... should decode to symbol 8
839        let data = [0b10000000, 0b00000000];
840        let mut reader = BitReader::new(&data);
841        assert_eq!(table.decode(&mut reader).unwrap(), 8);
842        assert_eq!(reader.bit_position(), 4); // Should consume exactly 4 bits
843    }
844
845    #[test]
846    fn test_huffman_table_rebuild_changes() {
847        // Start with a table that has different lengths
848        // This tests that rebuild() properly clears and rebuilds the quick_table
849        let initial_lengths: Vec<u8> = vec![4, 4, 5, 4, 3, 5, 4, 4, 4, 4, 4, 5, 4, 3, 5, 4, 0];
850        let mut table = HuffmanTable::new(&initial_lengths).unwrap();
851
852        // Now rebuild with all 4-bit codes
853        let new_lengths: Vec<u8> = vec![4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0];
854        table.rebuild(&new_lengths).unwrap();
855
856        // Check quick_table for index 568
857        let (sym, len) = table.quick_table_entry(568);
858        eprintln!(
859            "After rebuild: quick_table[568] = (sym={}, len={})",
860            sym, len
861        );
862        assert_eq!(
863            sym, 8,
864            "Symbol at quick_table[568] should be 8 after rebuild"
865        );
866        assert_eq!(
867            len, 4,
868            "Length at quick_table[568] should be 4 after rebuild"
869        );
870
871        // Check all entries from 512-575 (where first 4 bits = 1000)
872        for i in 512..576 {
873            let (s, l) = table.quick_table_entry(i);
874            assert_eq!(
875                s, 8,
876                "Symbol at quick_table[{}] should be 8 after rebuild, got {}",
877                i, s
878            );
879            assert_eq!(
880                l, 4,
881                "Length at quick_table[{}] should be 4 after rebuild, got {}",
882                i, l
883            );
884        }
885
886        // Test actual decoding
887        let data = [0b10000000, 0b00000000];
888        let mut reader = BitReader::new(&data);
889        assert_eq!(table.decode(&mut reader).unwrap(), 8);
890        assert_eq!(reader.bit_position(), 4);
891    }
892}