compcol 0.5.0

A no_std collection of compression algorithms behind a uniform streaming trait, gated per-algorithm by Cargo features.
//! LZHAM (Lossless ZIP Huffman Arithmetic Modeling) — **header parser only**.
//!
//! LZHAM is Rich Geldreich's LZMA-class codec, published at
//! <https://github.com/richgel999/lzham_codec> under the MIT licence. The
//! raw codec (`lzham_lib_decompress`) consumes an arithmetic-coded
//! bitstream produced by a multi-state Markov-chain model with dynamic
//! Huffman / polar / quasi-arithmetic sub-tables, none of which is
//! documented outside the reference C++ source. Dictionary size and
//! table-update-rate are caller-supplied parameters, not bytes in the
//! stream.
//!
//! ## What this build supports
//!
//! - The **`LZH0` container framing** emitted by `lzhamtest` (Rich's
//!   reference compress/decompress CLI). The decoder validates the magic,
//!   reads the `dict_size_log2` byte, reads the little-endian `u64`
//!   uncompressed size, and then — because the inner LZHAM bitstream is
//!   undocumented and untestable from this side — returns
//!   [`Error::Unsupported`] for the payload.
//! - The encoder permanently returns [`Error::Unsupported`].
//!
//! Implementing the inner codec faithfully would require porting ~3000
//! lines of C++ (the symbol codec, the LZ state machine with 24 transition
//! states, the table-update scheduler) into safe Rust and validating
//! against fixtures generated by the reference toolchain. That scope is
//! larger than a single batch's worth of work, so we ship the framing
//! layer cleanly and document the gap.
//!
//! ## `LZH0` framing
//!
//! | Offset | Size | Field                                            |
//! |--------|------|--------------------------------------------------|
//! | 0      | 4    | Magic: `b"LZH0"`                                 |
//! | 4      | 1    | `dict_size_log2` (`15..=29` per `lzham.h`)       |
//! | 5      | 8    | Uncompressed size, little-endian `u64`           |
//! | 13     | …    | LZHAM compressed payload (this build: Unsupported) |
//!
//! No checksum is stored in the file; the reference CLI recomputes
//! Adler-32 on the decompressed bytes and prints it for the user. This
//! parser does not enforce a checksum it cannot see.
//!
//! ## References
//!
//! - Reference codec: <https://github.com/richgel999/lzham_codec>
//! - Public API header: <https://github.com/richgel999/lzham_codec/blob/master/include/lzham.h>
//! - `lzhamtest` CLI (source of the `LZH0` framing):
//!   <https://github.com/richgel999/lzham_codec/blob/master/lzhamtest/lzhamtest.cpp>

#![cfg_attr(docsrs, doc(cfg(feature = "lzham")))]

use crate::error::Error;
use crate::traits::{Algorithm, RawEncoder, RawProgress};

mod decoder;

pub use decoder::Decoder;

/// Zero-sized marker type implementing [`Algorithm`] for LZHAM.
#[derive(Debug, Clone, Copy, Default)]
pub struct Lzham;

impl Algorithm for Lzham {
    const NAME: &'static str = "lzham";
    type Encoder = Encoder;
    type Decoder = Decoder;
    type EncoderConfig = ();
    type DecoderConfig = ();
    fn encoder_with(_: ()) -> Encoder {
        Encoder::new()
    }
    fn decoder_with(_: ()) -> Decoder {
        Decoder::new()
    }
}

// ─── shared format constants ─────────────────────────────────────────────

/// `LZH0` container magic emitted by `lzhamtest`.
pub(crate) const MAGIC: [u8; 4] = *b"LZH0";

/// Minimum and maximum `dict_size_log2` values accepted by the LZHAM
/// reference codec. `LZHAM_MIN_DICT_SIZE_LOG2` = 15 (32 KiB),
/// `LZHAM_MAX_DICT_SIZE_LOG2_X64` = 29 (512 MiB); these come from
/// `include/lzham.h`. We accept the full range on the parse side.
pub(crate) const MIN_DICT_LOG2: u8 = 15;
pub(crate) const MAX_DICT_LOG2: u8 = 29;

/// Total fixed-prefix length of the `LZH0` container header.
pub(crate) const HEADER_LEN: usize = 4 + 1 + 8;

// ─── encoder ─────────────────────────────────────────────────────────────

/// Encoder stub. LZHAM encoding is out of scope for this build; every
/// method here returns [`Error::Unsupported`].
#[derive(Debug, Default)]
pub struct Encoder;

impl Encoder {
    pub const fn new() -> Self {
        Self
    }
}

impl RawEncoder for Encoder {
    fn raw_encode(&mut self, _input: &[u8], _output: &mut [u8]) -> Result<RawProgress, Error> {
        Err(Error::Unsupported)
    }
    fn raw_finish(&mut self, _output: &mut [u8]) -> Result<RawProgress, Error> {
        Err(Error::Unsupported)
    }
    fn raw_reset(&mut self) {}
}