zesven 1.1.0 - Docs.rs

//! Fast LZMA2 encoder with radix match-finder.
//!
//! This module provides a fast LZMA2 encoder that uses a hash-chain based
//! match-finder for improved compression speed (1.5-3x faster than standard
//! LZMA2 at higher compression levels).
//!
//! # Algorithm Overview
//!
//! The Fast LZMA2 algorithm improves upon standard LZMA2 by using:
//!
//! - **Hash-Chain Match-Finder**: O(n) complexity for table building
//! - **Better Cache Efficiency**: Sequential memory access patterns
//! - **Parallel-Friendly**: Simple threading model with low memory overhead
//!
//! # Implementation Status
//!
//! The radix match-finder is implemented in `radix_mf.rs`. The encoder
//! uses a custom encoding pipeline with:
//!
//! - Range encoder (`lzma_rc.rs`)
//! - Probability context (`lzma_context.rs`)
//! - LZMA2 block format (`fast_lzma2_encode.rs`)
//! - Greedy token selection with uncompressed fallback
//!
//! **Note**: Each chunk uses dictionary reset (AllReset mode) for correctness.
//! Cross-chunk dictionary references are not yet supported.
//!
//! # Performance Characteristics
//!
//! | Aspect | Standard LZMA2 | Fast LZMA2 |
//! |--------|---------------|------------|
//! | Match Finding | Hash chains | Hash chains (optimized) |
//! | Table Build | O(n × depth) | O(n) |
//! | Speed at Level 5+ | Baseline | 1.5-3x faster (planned) |
//! | Compression Ratio | Baseline | Comparable |
//!
//! # Example
//!
//! ```rust,ignore
//! use zesven::codec::fast_lzma2::{FastLzma2Encoder, FastLzma2Options, Strategy};
//!
//! let mut encoder = FastLzma2Encoder::new(output, &FastLzma2Options {
//!     level: 6,
//!     strategy: Strategy::Balanced,
//!     dict_size: 8 * 1024 * 1024, // 8MB dictionary
//!     ..Default::default()
//! });
//!
//! encoder.write_all(data)?;
//! encoder.try_finish()?;
//! ```
//!
//! # Reference Implementation
//!
//! Based on the Fast LZMA2 library by Conor McCarthy:
//! <https://github.com/conor42/fast-lzma2>
//!
//! Key reference files:
//! - `radix_mf.c` - Radix match-finder core
//! - `radix_engine.h` - Radix sort algorithm
//! - `lzma2_enc.c` - LZMA2 block encoding

use std::io::{self, Write};

use super::fast_lzma2_encode::{
    self, ChunkResetMode, DEFAULT_BLOCK_SIZE, Token, encode_greedy, write_compressed_chunk,
};
use super::lzma_context::LzmaEncoderState;
use super::lzma_rc::LzmaRangeEncoder;
use super::{Encoder, method};

// Re-export the radix match-finder for direct use
pub use super::radix_mf::{Match, RadixMatchFinder};

/// Compression strategy for Fast LZMA2.
///
/// Different strategies trade off compression speed vs ratio.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Strategy {
    /// Fastest compression, slightly larger output.
    Fast,
    /// Balanced speed and compression ratio (default).
    #[default]
    Balanced,
    /// Best compression ratio, slower.
    Best,
}

/// Options for Fast LZMA2 encoder.
#[derive(Debug, Clone)]
pub struct FastLzma2Options {
    /// Compression level (1-10).
    ///
    /// - Levels 1-3: Fast mode, using Fast strategy internally
    /// - Levels 4-6: Balanced mode
    /// - Levels 7-10: Best mode, maximum compression
    pub level: u32,

    /// Dictionary size in bytes.
    ///
    /// Larger dictionaries improve compression but use more memory.
    /// Default: 8MB
    pub dict_size: u32,

    /// Compression strategy.
    ///
    /// Overrides the level-based default if set explicitly.
    pub strategy: Strategy,

    /// Number of threads for parallel compression.
    ///
    /// - `None`: Use all available cores
    /// - `Some(1)`: Single-threaded
    /// - `Some(n)`: Use n threads
    pub threads: Option<usize>,

    /// Search depth for match finding.
    ///
    /// Higher values find better matches but are slower.
    /// Default: level-dependent (16-256)
    pub depth: Option<u32>,
}

impl Default for FastLzma2Options {
    fn default() -> Self {
        Self {
            level: 6,
            dict_size: 8 * 1024 * 1024, // 8MB
            strategy: Strategy::Balanced,
            threads: None,
            depth: None,
        }
    }
}

impl FastLzma2Options {
    /// Creates options for a specific compression level.
    pub fn with_level(level: u32) -> Self {
        let level = level.clamp(1, 10);
        let strategy = match level {
            1..=3 => Strategy::Fast,
            4..=6 => Strategy::Balanced,
            _ => Strategy::Best,
        };

        // Dictionary sizes based on level (matching 7-zip defaults)
        let dict_size = match level {
            1 => 64 * 1024,        // 64KB
            2 => 256 * 1024,       // 256KB
            3 => 1024 * 1024,      // 1MB
            4 => 2 * 1024 * 1024,  // 2MB
            5 => 4 * 1024 * 1024,  // 4MB
            6 => 8 * 1024 * 1024,  // 8MB
            7 => 16 * 1024 * 1024, // 16MB
            8 => 32 * 1024 * 1024, // 32MB
            9 => 64 * 1024 * 1024, // 64MB
            _ => 64 * 1024 * 1024, // 64MB for level 10
        };

        Self {
            level,
            dict_size,
            strategy,
            threads: None,
            depth: None,
        }
    }

    /// Sets the dictionary size.
    pub fn dict_size(mut self, size: u32) -> Self {
        self.dict_size = size;
        self
    }

    /// Sets the compression strategy.
    pub fn strategy(mut self, strategy: Strategy) -> Self {
        self.strategy = strategy;
        self
    }

    /// Sets the number of threads.
    pub fn threads(mut self, threads: usize) -> Self {
        self.threads = Some(threads);
        self
    }

    /// Returns the LZMA2 properties byte for this configuration.
    pub fn properties(&self) -> u8 {
        // LZMA2 dict size encoding:
        // prop = 2 * floor(log2(dict_size)) + (dict_size >> (floor(log2(dict_size)) - 1)) - 2
        // Simplified: just find the right encoded value
        let dict = self.dict_size.max(4096); // Minimum 4KB
        let log2 = (32 - dict.leading_zeros()) - 1;

        // Check if dict_size is exactly a power of 2 or 3/2 * power of 2
        let power = 1u32 << log2;
        if dict == power {
            // Exact power of 2
            ((log2 - 12) * 2) as u8
        } else {
            // 3/2 * power of 2 (or close to it)
            ((log2 - 12) * 2 + 1) as u8
        }
    }
}

/// Fast LZMA2 encoder.
///
/// Uses a radix match-finder for improved compression speed.
///
/// # Implementation Status
///
/// - **Phase 1** (complete): LZMA2 uncompressed blocks for format validation.
///
/// - **Phase 2** (complete): Range encoding for compressed LZMA blocks.
///   Falls back to uncompressed output when compression doesn't help.
///
/// - **Phase 3** (future): Dictionary continuity across chunks for better
///   compression ratio on large files. Currently each chunk resets the
///   dictionary (AllReset mode).
pub struct FastLzma2Encoder<W: Write> {
    /// Output destination for LZMA2 stream.
    output: W,
    /// Encoder options.
    options: FastLzma2Options,
    /// Match finder for detecting repeated sequences.
    match_finder: RadixMatchFinder,
    /// LZMA encoder state with probability arrays.
    encoder_state: LzmaEncoderState,
    /// Buffer for accumulating input data before compression.
    buffer: Vec<u8>,
    /// Whether the first chunk has been written (for dictionary reset flag).
    first_chunk: bool,
    /// Whether encoding is complete.
    finished: bool,
}

impl<W: Write> std::fmt::Debug for FastLzma2Encoder<W> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("FastLzma2Encoder")
            .field("options", &self.options)
            .field("buffer_len", &self.buffer.len())
            .field("first_chunk", &self.first_chunk)
            .field("finished", &self.finished)
            .finish_non_exhaustive()
    }
}

impl<W: Write + Send> FastLzma2Encoder<W> {
    /// Creates a new Fast LZMA2 encoder.
    ///
    /// # Arguments
    ///
    /// * `output` - The destination for compressed data
    /// * `options` - Encoder options
    pub fn new(output: W, options: &FastLzma2Options) -> Self {
        // Determine search depth based on strategy
        let depth = options.depth.unwrap_or(match options.strategy {
            Strategy::Fast => 16,
            Strategy::Balanced => 64,
            Strategy::Best => 256,
        });

        let match_finder = RadixMatchFinder::new(options.dict_size as usize, depth);

        // Default LZMA parameters: lc=3, lp=0, pb=2
        let encoder_state = LzmaEncoderState::new(3, 0, 2);

        Self {
            output,
            options: options.clone(),
            match_finder,
            encoder_state,
            buffer: Vec::with_capacity(DEFAULT_BLOCK_SIZE),
            first_chunk: true,
            finished: false,
        }
    }

    /// Returns the LZMA2 properties for this encoder (1 byte).
    pub fn properties(options: &FastLzma2Options) -> Vec<u8> {
        vec![options.properties()]
    }

    /// Maximum uncompressed size for a single LZMA2 chunk.
    const MAX_CHUNK_SIZE: usize = 64 * 1024; // 64KB

    /// Compresses and writes a block of data.
    ///
    /// Uses the radix match-finder to find matches, encodes tokens via
    /// range coding, and writes LZMA2 compressed chunks.
    fn compress_block(&mut self, data: &[u8]) -> io::Result<()> {
        if data.is_empty() {
            return Ok(());
        }

        // Split large data into chunks of up to 64KB
        let mut pos = 0;
        while pos < data.len() {
            let chunk_end = (pos + Self::MAX_CHUNK_SIZE).min(data.len());
            let chunk = &data[pos..chunk_end];

            self.compress_chunk(chunk)?;
            pos = chunk_end;
        }

        Ok(())
    }

    /// Compresses and writes a single chunk of data (up to 64KB).
    fn compress_chunk(&mut self, data: &[u8]) -> io::Result<()> {
        if data.is_empty() {
            return Ok(());
        }

        // Step 1: Build match table
        self.match_finder.build(data);

        // Step 2: Greedy encode to tokens
        let tokens = encode_greedy(data, &self.match_finder);

        // Step 3: Range-encode tokens to LZMA stream
        let compressed = self.encode_tokens(&tokens, data);

        // Step 4: Decide whether to use compressed or uncompressed output
        // Use uncompressed if compression doesn't help (compressed >= uncompressed)
        if compressed.len() >= data.len() {
            // Fall back to uncompressed chunk
            fast_lzma2_encode::write_uncompressed_data(&mut self.output, data, self.first_chunk)?;
        } else {
            // Write compressed chunk
            // Performance note: Currently using AllReset for all chunks.
            // For better compression ratio, non-first chunks should use StateReset,
            // which requires:
            // 1. Preserving the dictionary window (last dict_size bytes) between chunks
            // 2. Modifying RadixMatchFinder to support lookback into previous chunk data
            // 3. Not resetting encoder probabilities between chunks
            // This is a significant optimization opportunity (~5-15% better compression)
            // but requires substantial implementation work.
            let reset_mode = ChunkResetMode::AllReset;

            let props = Some(0x5D); // lc=3, lp=0, pb=2 - always include for AllReset

            write_compressed_chunk(&mut self.output, &compressed, data.len(), reset_mode, props)?;
        }

        // Reset encoder state for next chunk
        self.encoder_state.reset();
        self.match_finder.reset();
        self.first_chunk = false;
        Ok(())
    }

    /// Encodes tokens to an LZMA bitstream using range coding.
    fn encode_tokens(&mut self, tokens: &[Token], data: &[u8]) -> Vec<u8> {
        let mut rc = LzmaRangeEncoder::new();

        let mut pos = 0usize;
        for token in tokens {
            match token {
                Token::Literal(byte) => {
                    let prev_byte = if pos > 0 { data[pos - 1] } else { 0 };

                    // Get match byte if we're in "after match" state (state >= 7)
                    let match_byte = if self.encoder_state.state() >= 7 && pos > 0 {
                        let reps = self.encoder_state.reps();
                        let dist = reps[0] + 1;
                        if pos >= dist as usize {
                            Some(data[pos - dist as usize])
                        } else {
                            None
                        }
                    } else {
                        None
                    };

                    self.encoder_state
                        .encode_literal(&mut rc, *byte, pos, prev_byte, match_byte);
                    pos += 1;
                }
                Token::Match { distance, length } => {
                    self.encoder_state
                        .encode_match(&mut rc, *distance, *length, pos);
                    pos += *length as usize;
                }
            }
        }

        rc.finish()
    }

    /// Finishes encoding and flushes all data.
    ///
    /// Writes any remaining buffered data and the LZMA2 end marker.
    /// Returns the underlying writer.
    pub fn try_finish(mut self) -> io::Result<W> {
        if self.finished {
            return Ok(self.output);
        }

        // Flush remaining buffer
        if !self.buffer.is_empty() {
            let data = std::mem::take(&mut self.buffer);
            self.compress_block(&data)?;
        }

        // Write LZMA2 end marker
        fast_lzma2_encode::write_end_marker(&mut self.output)?;

        self.finished = true;
        Ok(self.output)
    }

    /// Finishes encoding without returning the writer.
    pub fn finish(self) -> io::Result<()> {
        self.try_finish()?;
        Ok(())
    }
}

impl<W: Write + Send> Write for FastLzma2Encoder<W> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        if self.finished {
            return Err(io::Error::other("encoder already finished"));
        }

        self.buffer.extend_from_slice(buf);

        // Process when buffer exceeds block size
        while self.buffer.len() >= DEFAULT_BLOCK_SIZE {
            let block: Vec<u8> = self.buffer.drain(..DEFAULT_BLOCK_SIZE).collect();
            self.compress_block(&block)?;
        }

        Ok(buf.len())
    }

    fn flush(&mut self) -> io::Result<()> {
        // Flush any pending data
        if !self.buffer.is_empty() {
            let data = std::mem::take(&mut self.buffer);
            self.compress_block(&data)?;
        }
        self.output.flush()
    }
}

impl<W: Write + Send> Encoder for FastLzma2Encoder<W> {
    fn method_id(&self) -> &'static [u8] {
        // Uses same method ID as standard LZMA2 (format compatible)
        method::LZMA2
    }

    fn finish(self: Box<Self>) -> io::Result<()> {
        (*self).finish()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_options_default() {
        let opts = FastLzma2Options::default();
        assert_eq!(opts.level, 6);
        assert_eq!(opts.dict_size, 8 * 1024 * 1024);
        assert_eq!(opts.strategy, Strategy::Balanced);
    }

    #[test]
    fn test_options_with_level() {
        let opts = FastLzma2Options::with_level(1);
        assert_eq!(opts.level, 1);
        assert_eq!(opts.strategy, Strategy::Fast);

        let opts = FastLzma2Options::with_level(5);
        assert_eq!(opts.level, 5);
        assert_eq!(opts.strategy, Strategy::Balanced);

        let opts = FastLzma2Options::with_level(9);
        assert_eq!(opts.level, 9);
        assert_eq!(opts.strategy, Strategy::Best);
    }

    #[test]
    fn test_properties_encoding() {
        // 8MB dictionary (2^23) should encode to prop 22
        let opts = FastLzma2Options::default();
        let prop = opts.properties();
        // (23 - 12) * 2 = 22
        assert_eq!(prop, 22);

        // 4KB dictionary (2^12) should encode to prop 0
        let opts = FastLzma2Options::with_level(1).dict_size(4096);
        let prop = opts.properties();
        // (12 - 12) * 2 = 0
        assert_eq!(prop, 0);
    }

    #[test]
    fn test_fast_lzma2_encoder_roundtrip() {
        // Test that the custom encoder pipeline works (Phase 1: uncompressed LZMA2)
        let data = b"Hello, Fast LZMA2 World! This is a test of compression.";
        let mut compressed = Vec::new();

        {
            let mut encoder =
                FastLzma2Encoder::new(&mut compressed, &FastLzma2Options::with_level(3));
            encoder.write_all(data).unwrap();
            let _ = encoder.try_finish().unwrap();
        }

        // Output should have LZMA2 format (uncompressed chunks + end marker)
        assert!(!compressed.is_empty());

        // Decompress and verify
        let mut decompressed = Vec::new();
        let mut decoder = lzma_rust2::Lzma2Reader::new(
            std::io::Cursor::new(&compressed),
            1024 * 1024, // 1MB dict
            None,
        );
        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();

        assert_eq!(&decompressed[..], &data[..]);
    }

    #[test]
    fn test_fast_lzma2_encoder_incompressible() {
        // Test with random-like data that won't compress
        // Use a simple PRNG to generate pseudo-random data
        let mut state = 12345u32;
        let data: Vec<u8> = (0..60_000u32)
            .map(|_| {
                state = state.wrapping_mul(1103515245).wrapping_add(12345);
                (state >> 16) as u8
            })
            .collect();
        let mut compressed = Vec::new();

        {
            let mut encoder =
                FastLzma2Encoder::new(&mut compressed, &FastLzma2Options::with_level(3));
            encoder.write_all(&data).unwrap();
            let _ = encoder.try_finish().unwrap();
        }

        // Should use uncompressed format since random data doesn't compress
        let ctrl = compressed[0];
        assert!(
            ctrl == 0x01 || ctrl == 0x02,
            "Expected uncompressed format for random data"
        );

        // Decompress and verify
        let mut decompressed = Vec::new();
        let mut decoder =
            lzma_rust2::Lzma2Reader::new(std::io::Cursor::new(&compressed), 8 * 1024 * 1024, None);
        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();

        assert_eq!(decompressed, data);
    }

    #[test]
    fn test_fast_lzma2_encoder_single_chunk() {
        // Test with 60KB - less than one chunk (64KB)
        let data: Vec<u8> = (0..60_000u32).map(|i| (i % 256) as u8).collect();
        let mut compressed = Vec::new();

        {
            let mut encoder =
                FastLzma2Encoder::new(&mut compressed, &FastLzma2Options::with_level(3));
            encoder.write_all(&data).unwrap();
            let _ = encoder.try_finish().unwrap();
        }

        // Decompress and verify
        let mut decompressed = Vec::new();
        let mut decoder = lzma_rust2::Lzma2Reader::new(
            std::io::Cursor::new(&compressed),
            8 * 1024 * 1024, // 8MB dict
            None,
        );
        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();

        assert_eq!(decompressed, data);
    }

    #[test]
    fn test_fast_lzma2_encoder_medium_data() {
        // Test with 100KB - larger than one chunk (64KB)
        let data: Vec<u8> = (0..100_000u32).map(|i| (i % 256) as u8).collect();
        let mut compressed = Vec::new();

        {
            let mut encoder =
                FastLzma2Encoder::new(&mut compressed, &FastLzma2Options::with_level(3));
            encoder.write_all(&data).unwrap();
            let _ = encoder.try_finish().unwrap();
        }

        // Decompress and verify
        let mut decompressed = Vec::new();
        let mut decoder = lzma_rust2::Lzma2Reader::new(
            std::io::Cursor::new(&compressed),
            8 * 1024 * 1024, // 8MB dict
            None,
        );
        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();

        assert_eq!(decompressed, data);
    }

    #[test]
    fn test_fast_lzma2_encoder_large_data() {
        // Test data larger than one block (1MB)
        let data: Vec<u8> = (0..1_500_000u32).map(|i| (i % 256) as u8).collect();
        let mut compressed = Vec::new();

        {
            let mut encoder =
                FastLzma2Encoder::new(&mut compressed, &FastLzma2Options::with_level(3));
            encoder.write_all(&data).unwrap();
            let _ = encoder.try_finish().unwrap();
        }

        // Decompress and verify
        let mut decompressed = Vec::new();
        let mut decoder = lzma_rust2::Lzma2Reader::new(
            std::io::Cursor::new(&compressed),
            8 * 1024 * 1024, // 8MB dict
            None,
        );
        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();

        assert_eq!(decompressed, data);
    }

    #[test]
    fn test_fast_lzma2_encoder_empty_data() {
        let data = b"";
        let mut compressed = Vec::new();

        {
            let mut encoder =
                FastLzma2Encoder::new(&mut compressed, &FastLzma2Options::with_level(3));
            encoder.write_all(data).unwrap();
            let _ = encoder.try_finish().unwrap();
        }

        // Should only have end marker (0x00)
        assert_eq!(compressed, vec![0x00]);
    }

    #[test]
    fn test_match_struct() {
        let m = Match::new(100, 5);
        assert_eq!(m.offset, 100);
        assert_eq!(m.length, 5);
    }

    #[test]
    fn test_radix_match_finder_new() {
        let mf = RadixMatchFinder::new(1024 * 1024, 64);
        assert_eq!(mf.dict_size(), 1024 * 1024);
    }

    #[test]
    fn test_radix_match_finder_integration() {
        // Test that the match finder works with the encoder options
        let opts = FastLzma2Options::with_level(6);
        let mut mf = RadixMatchFinder::new(opts.dict_size as usize, 32);

        let data = b"Hello, World! Hello, World! Hello, World!";
        mf.build(data);

        // Should find a match at position 14 ("Hello, World!" repeated)
        let m = mf.get_match(data, 14);
        assert!(m.is_some());
        let m = m.unwrap();
        assert!(m.length >= 13); // "Hello, World!"
    }
}