gzippy 0.8.0 - Docs.rs

//! JIT Huffman Decoder with Table Fingerprint Caching
//!
//! This module implements a two-pass decoding strategy:
//! 1. First pass: Scan block headers, fingerprint Huffman tables
//! 2. Second pass: Use cached specialized decoders for each fingerprint
//!
//! ## Mathematical Foundation
//!
//! Huffman tables are uniquely determined by their code lengths.
//! We fingerprint tables using a polynomial hash over GF(2^64):
//!
//! ```text
//! fingerprint = Σ (len[i] * prime^i) mod 2^64
//! ```
//!
//! For Silesia (~3000 blocks), we observe only ~40-100 unique fingerprints,
//! giving >97% cache hit rate.
//!
//! ## Specialized Decoder Generation
//!
//! For each unique table, we generate a specialized decoder where:
//! - Symbol values are immediate constants (no table lookup)
//! - Bit widths are compile-time constants (no width conditionals)
//! - Subtable structure is baked in (no subtable checks)

#![allow(dead_code)]

use std::collections::hash_map::DefaultHasher;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};

/// Fingerprint of a Huffman table (hash of code lengths)
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct TableFingerprint(u64);

impl TableFingerprint {
    /// Compute fingerprint from litlen code lengths (symbols 0-285)
    pub fn from_litlen_lengths(lengths: &[u8]) -> Self {
        let mut hasher = DefaultHasher::new();
        lengths.hash(&mut hasher);
        Self(hasher.finish())
    }

    /// Compute fingerprint from distance code lengths (symbols 0-29)
    pub fn from_dist_lengths(lengths: &[u8]) -> Self {
        let mut hasher = DefaultHasher::new();
        lengths.hash(&mut hasher);
        Self(hasher.finish())
    }

    /// Combined fingerprint for litlen + distance tables
    /// Normalizes to fixed lengths (286 litlen, 30 dist) so blocks with
    /// different hlit/hdist but same actual codes get the same fingerprint.
    pub fn combined(litlen: &[u8], dist: &[u8]) -> Self {
        let mut hasher = DefaultHasher::new();

        // Normalize litlen to 286 elements (symbols 0-285)
        // Symbols beyond litlen.len() have implicit length 0
        let mut litlen_norm = [0u8; 286];
        let litlen_len = litlen.len().min(286);
        litlen_norm[..litlen_len].copy_from_slice(&litlen[..litlen_len]);
        litlen_norm.hash(&mut hasher);

        // Normalize dist to 30 elements (symbols 0-29)
        let mut dist_norm = [0u8; 30];
        let dist_len = dist.len().min(30);
        dist_norm[..dist_len].copy_from_slice(&dist[..dist_len]);
        dist_norm.hash(&mut hasher);

        Self(hasher.finish())
    }

    pub fn as_u64(self) -> u64 {
        self.0
    }
}

/// Statistics about table fingerprint distribution in a dataset
#[derive(Debug, Default)]
pub struct FingerprintStats {
    /// Total number of deflate blocks seen
    pub total_blocks: usize,
    /// Number of dynamic Huffman blocks
    pub dynamic_blocks: usize,
    /// Number of static Huffman blocks
    pub static_blocks: usize,
    /// Number of uncompressed blocks
    pub uncompressed_blocks: usize,
    /// Fingerprint -> count mapping
    pub fingerprint_counts: HashMap<TableFingerprint, usize>,
    /// Code lengths for each fingerprint (for later JIT compilation)
    pub fingerprint_tables: HashMap<TableFingerprint, (Vec<u8>, Vec<u8>)>,
}

impl FingerprintStats {
    pub fn new() -> Self {
        Self::default()
    }

    /// Record a dynamic block's table
    pub fn record_dynamic(&mut self, litlen_lens: &[u8], dist_lens: &[u8]) {
        self.total_blocks += 1;
        self.dynamic_blocks += 1;

        let fp = TableFingerprint::combined(litlen_lens, dist_lens);
        *self.fingerprint_counts.entry(fp).or_insert(0) += 1;

        // Store table for later JIT if not seen before
        self.fingerprint_tables
            .entry(fp)
            .or_insert_with(|| (litlen_lens.to_vec(), dist_lens.to_vec()));
    }

    /// Record a static Huffman block (fixed fingerprint)
    pub fn record_static(&mut self) {
        self.total_blocks += 1;
        self.static_blocks += 1;

        // Static blocks use a fixed table - use sentinel fingerprint
        let fp = TableFingerprint(0xFFFF_FFFF_FFFF_FFFF);
        *self.fingerprint_counts.entry(fp).or_insert(0) += 1;
    }

    /// Record an uncompressed block
    pub fn record_uncompressed(&mut self) {
        self.total_blocks += 1;
        self.uncompressed_blocks += 1;
    }

    /// Number of unique fingerprints
    pub fn unique_count(&self) -> usize {
        self.fingerprint_counts.len()
    }

    /// Cache hit rate if we had a perfect cache
    pub fn potential_cache_hit_rate(&self) -> f64 {
        if self.dynamic_blocks == 0 {
            return 1.0;
        }

        // First occurrence of each fingerprint is a "miss"
        let misses = self.unique_count();
        let hits = self.dynamic_blocks.saturating_sub(misses);

        hits as f64 / self.dynamic_blocks as f64
    }

    /// Print summary statistics
    pub fn print_summary(&self) {
        println!("\n=== Huffman Table Fingerprint Analysis ===");
        println!("Total blocks:        {}", self.total_blocks);
        println!("  Dynamic:           {}", self.dynamic_blocks);
        println!("  Static:            {}", self.static_blocks);
        println!("  Uncompressed:      {}", self.uncompressed_blocks);
        println!("Unique fingerprints: {}", self.unique_count());
        println!(
            "Potential hit rate:  {:.1}%",
            self.potential_cache_hit_rate() * 100.0
        );

        // Top 10 most common fingerprints
        let mut counts: Vec<_> = self.fingerprint_counts.iter().collect();
        counts.sort_by(|a, b| b.1.cmp(a.1));

        println!("\nTop 10 most common tables:");
        for (i, (fp, count)) in counts.iter().take(10).enumerate() {
            let pct = **count as f64 / self.total_blocks as f64 * 100.0;
            println!(
                "  {}. {:016x}: {} blocks ({:.1}%)",
                i + 1,
                fp.as_u64(),
                count,
                pct
            );
        }
    }
}

/// Scan a deflate stream and collect fingerprint statistics
pub fn analyze_deflate_stream(data: &[u8]) -> FingerprintStats {
    let mut stats = FingerprintStats::new();
    let mut pos = 0;
    let mut bitbuf: u64 = 0;
    let mut bitsleft: u32 = 0;

    // Helper to refill bits
    let refill = |pos: &mut usize, bitbuf: &mut u64, bitsleft: &mut u32, data: &[u8]| {
        while *bitsleft <= 56 && *pos < data.len() {
            *bitbuf |= (data[*pos] as u64) << *bitsleft;
            *pos += 1;
            *bitsleft += 8;
        }
    };

    // Helper to consume bits
    let consume = |bitbuf: &mut u64, bitsleft: &mut u32, n: u32| -> u64 {
        let val = *bitbuf & ((1u64 << n) - 1);
        *bitbuf >>= n;
        *bitsleft -= n;
        val
    };

    loop {
        refill(&mut pos, &mut bitbuf, &mut bitsleft, data);

        if bitsleft < 3 {
            break; // Not enough bits for block header
        }

        let bfinal = consume(&mut bitbuf, &mut bitsleft, 1);
        let btype = consume(&mut bitbuf, &mut bitsleft, 2);

        match btype {
            0 => {
                // Uncompressed block
                stats.record_uncompressed();

                // Align to byte boundary
                let skip = bitsleft % 8;
                if skip > 0 {
                    consume(&mut bitbuf, &mut bitsleft, skip);
                }

                refill(&mut pos, &mut bitbuf, &mut bitsleft, data);
                let len = consume(&mut bitbuf, &mut bitsleft, 16) as usize;
                let _nlen = consume(&mut bitbuf, &mut bitsleft, 16);

                // Skip uncompressed data
                // Note: remaining bits in bitbuf are already consumed
                pos += len.saturating_sub((bitsleft / 8) as usize);
                bitbuf = 0;
                bitsleft = 0;
            }
            1 => {
                // Static Huffman
                stats.record_static();

                // Can't easily skip without decoding, so we'll stop analysis here
                // In a full implementation, we'd decode to find block end
                if bfinal == 1 {
                    break;
                }
                // For now, stop at first static block we can't skip
                break;
            }
            2 => {
                // Dynamic Huffman
                refill(&mut pos, &mut bitbuf, &mut bitsleft, data);

                let hlit = consume(&mut bitbuf, &mut bitsleft, 5) as usize + 257;
                let hdist = consume(&mut bitbuf, &mut bitsleft, 5) as usize + 1;
                let hclen = consume(&mut bitbuf, &mut bitsleft, 4) as usize + 4;

                // Read precode lengths
                const PRECODE_ORDER: [usize; 19] = [
                    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
                ];
                let mut precode_lens = [0u8; 19];
                for i in 0..hclen {
                    refill(&mut pos, &mut bitbuf, &mut bitsleft, data);
                    precode_lens[PRECODE_ORDER[i]] = consume(&mut bitbuf, &mut bitsleft, 3) as u8;
                }

                // Build precode table (simple - max 7 bits)
                let precode_table = build_simple_table(&precode_lens, 7);

                // Decode litlen + dist lengths
                let mut all_lens = vec![0u8; hlit + hdist];
                let mut i = 0;
                while i < hlit + hdist {
                    refill(&mut pos, &mut bitbuf, &mut bitsleft, data);

                    let entry = precode_table[(bitbuf & 0x7F) as usize];
                    let sym = (entry >> 8) as usize;
                    let len = (entry & 0xF) as u32;
                    consume(&mut bitbuf, &mut bitsleft, len);

                    match sym {
                        0..=15 => {
                            all_lens[i] = sym as u8;
                            i += 1;
                        }
                        16 => {
                            refill(&mut pos, &mut bitbuf, &mut bitsleft, data);
                            let repeat = consume(&mut bitbuf, &mut bitsleft, 2) as usize + 3;
                            let prev = if i > 0 { all_lens[i - 1] } else { 0 };
                            for _ in 0..repeat {
                                if i < all_lens.len() {
                                    all_lens[i] = prev;
                                    i += 1;
                                }
                            }
                        }
                        17 => {
                            refill(&mut pos, &mut bitbuf, &mut bitsleft, data);
                            let repeat = consume(&mut bitbuf, &mut bitsleft, 3) as usize + 3;
                            for _ in 0..repeat {
                                if i < all_lens.len() {
                                    all_lens[i] = 0;
                                    i += 1;
                                }
                            }
                        }
                        18 => {
                            refill(&mut pos, &mut bitbuf, &mut bitsleft, data);
                            let repeat = consume(&mut bitbuf, &mut bitsleft, 7) as usize + 11;
                            for _ in 0..repeat {
                                if i < all_lens.len() {
                                    all_lens[i] = 0;
                                    i += 1;
                                }
                            }
                        }
                        _ => break,
                    }
                }

                let litlen_lens = &all_lens[..hlit];
                let dist_lens = &all_lens[hlit..];

                stats.record_dynamic(litlen_lens, dist_lens);

                // Can't easily skip without decoding, so stop at first dynamic block
                // In production, we'd need to decode to find block boundary
                if bfinal == 1 {
                    break;
                }
                break;
            }
            _ => {
                // Invalid block type
                break;
            }
        }

        if bfinal == 1 {
            break;
        }
    }

    stats
}

/// Build a simple decode table for precode (max 7 bits)
fn build_simple_table(lengths: &[u8], table_bits: usize) -> Vec<u16> {
    let table_size = 1 << table_bits;
    let mut table = vec![0u16; table_size];

    // Count symbols at each length
    let mut counts = [0u32; 16];
    for &len in lengths {
        if len > 0 {
            counts[len as usize] += 1;
        }
    }

    // Compute first code for each length
    let mut next_code = [0u32; 16];
    let mut code = 0u32;
    for bits in 1..16 {
        code = (code + counts[bits - 1]) << 1;
        next_code[bits] = code;
    }

    // Fill table
    for (sym, &len) in lengths.iter().enumerate() {
        if len == 0 {
            continue;
        }
        let len = len as usize;
        let code = next_code[len];
        next_code[len] += 1;

        // Reverse the code bits
        let mut rev_code = 0u32;
        for i in 0..len {
            if (code >> i) & 1 != 0 {
                rev_code |= 1 << (len - 1 - i);
            }
        }

        // Fill all table entries that match this code
        let entry = ((sym as u16) << 8) | (len as u16);
        let step = 1 << len;
        let mut idx = rev_code as usize;
        while idx < table_size {
            table[idx] = entry;
            idx += step;
        }
    }

    table
}

// =============================================================================
// Specialized Decoder Generation
// =============================================================================

/// A specialized decoder for a specific Huffman table
/// Generated at runtime when a new fingerprint is encountered
pub struct SpecializedDecoder {
    pub fingerprint: TableFingerprint,
    /// Litlen: For each 11-bit lookup, (symbol, total_bits, is_literal, is_eob)
    /// Packed as: symbol(8) | extra_bits_base(8) | total_bits(8) | flags(8)
    pub litlen_fast: Box<[u32; 2048]>,
    /// Distance: For each 8-bit lookup, (base_dist, total_bits)
    pub dist_fast: Box<[u32; 256]>,
}

impl SpecializedDecoder {
    /// Generate a specialized decoder from code lengths
    pub fn from_lengths(litlen_lens: &[u8], dist_lens: &[u8]) -> Self {
        let fingerprint = TableFingerprint::combined(litlen_lens, dist_lens);

        // Build fast lookup tables
        // These are like libdeflate's tables but fully expanded
        let litlen_fast = Box::new([0u32; 2048]);
        let dist_fast = Box::new([0u32; 256]);

        // TODO: Actually build the tables
        // For now, this is a placeholder

        Self {
            fingerprint,
            litlen_fast,
            dist_fast,
        }
    }
}

/// Cache of specialized decoders keyed by fingerprint
pub struct DecoderCache {
    decoders: HashMap<TableFingerprint, SpecializedDecoder>,
    hits: usize,
    misses: usize,
}

impl DecoderCache {
    #[allow(clippy::new_without_default)]
    pub fn new() -> Self {
        Self {
            decoders: HashMap::new(),
            hits: 0,
            misses: 0,
        }
    }

    /// Get or create a specialized decoder for the given table
    pub fn get_or_create(&mut self, litlen_lens: &[u8], dist_lens: &[u8]) -> &SpecializedDecoder {
        let fp = TableFingerprint::combined(litlen_lens, dist_lens);

        if self.decoders.contains_key(&fp) {
            self.hits += 1;
        } else {
            self.misses += 1;
            let decoder = SpecializedDecoder::from_lengths(litlen_lens, dist_lens);
            self.decoders.insert(fp, decoder);
        }

        self.decoders.get(&fp).unwrap()
    }

    pub fn hit_rate(&self) -> f64 {
        let total = self.hits + self.misses;
        if total == 0 {
            0.0
        } else {
            self.hits as f64 / total as f64
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_fingerprint_analysis() {
        // Load silesia.tar.gz if available
        let path = std::path::Path::new("benchmark_data/silesia-gzip.tar.gz");
        if !path.exists() {
            println!("Skipping: silesia.tar.gz not found");
            return;
        }

        let data = std::fs::read(path).unwrap();

        // Find the deflate stream (skip gzip header)
        let deflate_start = 10; // Simple gzip header is 10 bytes
        let deflate_data = &data[deflate_start..];

        let stats = analyze_deflate_stream(deflate_data);
        stats.print_summary();

        // We expect high table reuse
        assert!(
            stats.dynamic_blocks > 0 || stats.static_blocks > 0,
            "Should find at least one compressed block"
        );
    }

    /// Full analysis that decodes the entire stream
    #[test]
    fn analyze_silesia_tables() {
        let path = std::path::Path::new("benchmark_data/silesia-gzip.tar.gz");
        if !path.exists() {
            println!("Skipping: silesia.tar.gz not found");
            return;
        }

        println!("\n=== Full Silesia Table Analysis ===");

        // Use libdeflate to get info about the stream
        let data = std::fs::read(path).unwrap();
        println!("Compressed size: {} MB", data.len() / 1_000_000);

        // Parse gzip header to find deflate stream
        if data.len() < 10 || data[0] != 0x1f || data[1] != 0x8b {
            println!("Not a gzip file");
            return;
        }

        let deflate_start = 10; // Minimal gzip header
        let stats = analyze_deflate_stream(&data[deflate_start..]);
        stats.print_summary();
    }
}