minlz 1.1.0 - Docs.rs

// Copyright 2024 Karpeles Lab Inc.
// Based on the S2 compression format by Klaus Post
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

use crate::constants::*;
use crate::dict::{Dict, MAX_DICT_SRC_OFFSET};
use crate::error::{Error, Result};
use crate::varint::decode_varint;

/// Decoder for S2 and Snappy compression
pub struct Decoder {
    /// Whether to allow Snappy format (no repeat offsets)
    #[allow(dead_code)]
    allow_snappy: bool,
}

impl Decoder {
    /// Create a new decoder that accepts both S2 and Snappy formats
    pub fn new() -> Self {
        Decoder { allow_snappy: true }
    }

    /// Create a decoder that only accepts S2 format
    pub fn new_s2_only() -> Self {
        Decoder {
            allow_snappy: false,
        }
    }
}

impl Default for Decoder {
    fn default() -> Self {
        Self::new()
    }
}

/// Decode returns the decoded form of src. The returned slice may be a sub-slice
/// of dst if dst was large enough to hold the entire decoded block.
/// Otherwise, a newly allocated Vec will be returned.
///
/// This function accepts both S2 and Snappy format.
/// The dst and src must not overlap. It is valid to pass an empty dst.
pub fn decode(src: &[u8]) -> Result<Vec<u8>> {
    let (dlen, header_len) = decode_len(src)?;
    let mut dst = alloc_uninit_dst(dlen)?;
    s2_decode(&mut dst, &src[header_len..])?;
    Ok(dst)
}

/// Decode Snappy format data
/// This is an alias for decode() since S2 decoder handles Snappy format
pub fn decode_snappy(src: &[u8]) -> Result<Vec<u8>> {
    decode(src)
}

/// Decode with dictionary
///
/// Decodes S2 data that was compressed with a dictionary.
/// Early copy operations may reference the dictionary instead of already-decoded output.
pub fn decode_with_dict(src: &[u8], dict: &Dict) -> Result<Vec<u8>> {
    let (dlen, header_len) = decode_len(src)?;
    let mut dst = alloc_uninit_dst(dlen)?;
    s2_decode_dict(&mut dst, &src[header_len..], dict)?;
    Ok(dst)
}

/// Hard cap on the decoded length the block-format decoder will accept.
///
/// The S2 wire format encodes the decoded length as a varint that can
/// represent up to 2^32 − 1 bytes (≈ 4 GiB). Accepting that at face
/// value lets a 5-byte adversarial input force a multi-gigabyte
/// allocation; on Linux the `malloc` returns virtual address space
/// without committing physical memory, so even `try_reserve_exact`
/// cannot detect the problem reliably (it succeeds, then later writes
/// page-fault). libFuzzer's malloc interceptor then aborts the
/// process when the request crosses its rss limit.
///
/// 256 MiB is well past any realistic single-block use (the stream
/// format already caps individual blocks at `MAX_BLOCK_SIZE` = 4 MiB)
/// and far below libFuzzer's 2 GiB default. Callers who need to
/// decode arbitrarily large payloads should use the stream
/// [`Reader`](crate::Reader) API instead, which processes one capped
/// block at a time.
pub const MAX_DECODE_DST_SIZE: usize = 256 * 1024 * 1024;

/// Allocate a `Vec<u8>` of length `n` whose bytes are *uninitialized*.
///
/// The S2 decoder writes every byte of the destination from 0..len before
/// returning Ok and only ever reads from positions it has already written.
/// We therefore skip the calloc-style zero-fill that `vec![0u8; n]` would
/// otherwise perform — that memset is the single largest cost on the
/// decode path for small/medium blocks (≈ 80 % of cycles).
///
/// Two layers protect against adversarial length headers:
///   1. A hard cap at [`MAX_DECODE_DST_SIZE`] rejects obviously
///      malicious requests up front.
///   2. `try_reserve_exact` covers the rest, returning
///      [`Error::TooLarge`] when the allocator itself can't satisfy
///      the request.
///
/// If the decoder returns Err, the partially-uninitialized Vec is dropped;
/// that is safe because `u8` has no Drop and no code path reads from the
/// uninit prefix.
#[inline]
fn alloc_uninit_dst(n: usize) -> Result<Vec<u8>> {
    if n > MAX_DECODE_DST_SIZE {
        return Err(Error::TooLarge);
    }
    let mut v: Vec<u8> = Vec::new();
    v.try_reserve_exact(n).map_err(|_| Error::TooLarge)?;
    // SAFETY: capacity is now ≥ `n`. The decoder fully initializes
    // 0..n before any read (see contract above); writes via
    // copy_from_slice / copy_within over an uninit `&mut [u8]` are
    // sound — no read of uninit bytes ever occurs.
    #[allow(clippy::uninit_vec)]
    unsafe {
        v.set_len(n);
    }
    Ok(v)
}

/// Decode into a pre-allocated destination buffer.
///
/// `dst` must be at least `decode_len(src)?.0` bytes; otherwise
/// returns [`Error::BufferTooSmall`]. The returned `usize` is the
/// number of bytes written into `dst`.
///
/// Useful for hot loops that decode into a reusable buffer without
/// allocating a fresh `Vec` per call.
pub fn decode_into(dst: &mut [u8], src: &[u8]) -> Result<usize> {
    let (dlen, header_len) = decode_len(src)?;

    if dst.len() < dlen {
        return Err(Error::BufferTooSmall);
    }

    s2_decode(&mut dst[..dlen], &src[header_len..])?;

    Ok(dlen)
}

/// Returns the length of the decoded block and the number of bytes
/// that the length header occupied.
pub fn decode_len(src: &[u8]) -> Result<(usize, usize)> {
    let (v, n) = decode_varint(src)?;

    if v > 0xffffffff {
        return Err(Error::Corrupt);
    }

    // Check for 32-bit overflow on 32-bit systems
    #[cfg(target_pointer_width = "32")]
    {
        if v > 0x7fffffff {
            return Err(Error::TooLarge);
        }
    }

    Ok((v as usize, n))
}

/// Core S2 decoding function
fn s2_decode(dst: &mut [u8], src: &[u8]) -> Result<()> {
    let mut d = 0; // destination index
    let mut s = 0; // source index
    let mut offset = 0; // last copy offset

    // Fast path: process as long as we can read at least 5 bytes
    while s < src.len().saturating_sub(5) {
        let tag = src[s] & 0x03;

        match tag {
            TAG_LITERAL => {
                let (length, bytes_consumed) = decode_literal_length(&src[s..])?;
                s += bytes_consumed;

                // Bounds check
                if length > dst.len() - d || length > src.len() - s {
                    return Err(Error::Corrupt);
                }

                // Copy literal bytes
                dst[d..d + length].copy_from_slice(&src[s..s + length]);
                d += length;
                s += length;
            }
            TAG_COPY1 => {
                let (new_offset, length, bytes_consumed) = decode_copy1(&src[s..], offset)?;
                s += bytes_consumed;
                offset = new_offset;

                // Bounds check
                if offset == 0 || d < offset || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from earlier in dst
                copy_within(dst, d, offset, length);
                d += length;
            }
            TAG_COPY2 => {
                if s + 3 > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u16::from_le_bytes(src[s + 1..s + 3].try_into().unwrap()) as usize;
                let length = 1 + ((src[s] >> 2) as usize);
                s += 3;

                // Bounds check
                if offset == 0 || d < offset || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from earlier in dst
                copy_within(dst, d, offset, length);
                d += length;
            }
            TAG_COPY4 => {
                if s + 5 > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u32::from_le_bytes(src[s + 1..s + 5].try_into().unwrap()) as usize;
                let length = 1 + ((src[s] >> 2) as usize);
                s += 5;

                // Bounds check
                if offset == 0 || d < offset || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from earlier in dst
                copy_within(dst, d, offset, length);
                d += length;
            }
            _ => unreachable!(),
        }
    }

    // Slow path: process remaining bytes with extra bounds checking
    while s < src.len() {
        let tag = src[s] & 0x03;

        match tag {
            TAG_LITERAL => {
                let (length, bytes_consumed) = decode_literal_length(&src[s..])?;
                s += bytes_consumed;

                // Bounds check
                if s > src.len() || length > dst.len() - d || length > src.len() - s {
                    return Err(Error::Corrupt);
                }

                // Copy literal bytes
                dst[d..d + length].copy_from_slice(&src[s..s + length]);
                d += length;
                s += length;
            }
            TAG_COPY1 => {
                let (new_offset, length, bytes_consumed) = decode_copy1(&src[s..], offset)?;
                s += bytes_consumed;

                if s > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = new_offset;

                // Bounds check
                if offset == 0 || d < offset || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from earlier in dst
                copy_within(dst, d, offset, length);
                d += length;
            }
            TAG_COPY2 => {
                s += 3;
                if s > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u16::from_le_bytes(src[s - 2..s].try_into().unwrap()) as usize;
                let length = 1 + ((src[s - 3] >> 2) as usize);

                // Bounds check
                if offset == 0 || d < offset || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from earlier in dst
                copy_within(dst, d, offset, length);
                d += length;
            }
            TAG_COPY4 => {
                s += 5;
                if s > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u32::from_le_bytes(src[s - 4..s].try_into().unwrap()) as usize;
                let length = 1 + ((src[s - 5] >> 2) as usize);

                // Bounds check
                if offset == 0 || d < offset || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from earlier in dst
                copy_within(dst, d, offset, length);
                d += length;
            }
            _ => unreachable!(),
        }
    }

    // Verify we decoded exactly the right amount
    if d != dst.len() {
        return Err(Error::Corrupt);
    }

    Ok(())
}

/// Core S2 decoding function with dictionary support
fn s2_decode_dict(dst: &mut [u8], src: &[u8], dict: &Dict) -> Result<()> {
    let mut d = 0; // destination index
    let mut s = 0; // source index
    let mut offset = dict.data().len() - dict.repeat(); // Initialize with dictionary repeat offset

    // Fast path: process as long as we can read at least 5 bytes
    while s < src.len().saturating_sub(5) {
        let tag = src[s] & 0x03;

        match tag {
            TAG_LITERAL => {
                let (length, bytes_consumed) = decode_literal_length(&src[s..])?;
                s += bytes_consumed;

                // Bounds check
                if length > dst.len() - d || length > src.len() - s {
                    return Err(Error::Corrupt);
                }

                // Copy literal bytes
                dst[d..d + length].copy_from_slice(&src[s..s + length]);
                d += length;
                s += length;
            }
            TAG_COPY1 => {
                let (new_offset, length, bytes_consumed) = decode_copy1(&src[s..], offset)?;
                s += bytes_consumed;
                offset = new_offset;

                // Bounds check
                if offset == 0 || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from dictionary if needed
                if d < offset {
                    // Copying from dictionary
                    if d > MAX_DICT_SRC_OFFSET {
                        return Err(Error::Corrupt);
                    }

                    let dict_start = dict.data().len() - offset + d;
                    if dict_start + length > dict.data().len() {
                        return Err(Error::Corrupt);
                    }

                    dst[d..d + length]
                        .copy_from_slice(&dict.data()[dict_start..dict_start + length]);
                } else {
                    // Copy from earlier in dst
                    copy_within(dst, d, offset, length);
                }
                d += length;
            }
            TAG_COPY2 => {
                if s + 3 > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u16::from_le_bytes(src[s + 1..s + 3].try_into().unwrap()) as usize;
                let length = 1 + ((src[s] >> 2) as usize);
                s += 3;

                // Bounds check
                if offset == 0 || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from dictionary if needed
                if d < offset {
                    if d > MAX_DICT_SRC_OFFSET {
                        return Err(Error::Corrupt);
                    }

                    let dict_start = dict.data().len() - offset + d;
                    if dict_start + length > dict.data().len() {
                        return Err(Error::Corrupt);
                    }

                    dst[d..d + length]
                        .copy_from_slice(&dict.data()[dict_start..dict_start + length]);
                } else {
                    copy_within(dst, d, offset, length);
                }
                d += length;
            }
            TAG_COPY4 => {
                if s + 5 > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u32::from_le_bytes(src[s + 1..s + 5].try_into().unwrap()) as usize;
                let length = 1 + ((src[s] >> 2) as usize);
                s += 5;

                // Bounds check
                if offset == 0 || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from dictionary if needed
                if d < offset {
                    if d > MAX_DICT_SRC_OFFSET {
                        return Err(Error::Corrupt);
                    }

                    let dict_start = dict.data().len() - offset + d;
                    if dict_start + length > dict.data().len() {
                        return Err(Error::Corrupt);
                    }

                    dst[d..d + length]
                        .copy_from_slice(&dict.data()[dict_start..dict_start + length]);
                } else {
                    copy_within(dst, d, offset, length);
                }
                d += length;
            }
            _ => unreachable!(),
        }
    }

    // Slow path: process remaining bytes with extra bounds checking
    while s < src.len() {
        let tag = src[s] & 0x03;

        match tag {
            TAG_LITERAL => {
                let (length, bytes_consumed) = decode_literal_length(&src[s..])?;
                s += bytes_consumed;

                // Bounds check
                if s > src.len() || length > dst.len() - d || length > src.len() - s {
                    return Err(Error::Corrupt);
                }

                // Copy literal bytes
                dst[d..d + length].copy_from_slice(&src[s..s + length]);
                d += length;
                s += length;
            }
            TAG_COPY1 => {
                let (new_offset, length, bytes_consumed) = decode_copy1(&src[s..], offset)?;
                s += bytes_consumed;

                if s > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = new_offset;

                // Bounds check
                if offset == 0 || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from dictionary if needed
                if d < offset {
                    if d > MAX_DICT_SRC_OFFSET {
                        return Err(Error::Corrupt);
                    }

                    let dict_start = dict.data().len() - offset + d;
                    if dict_start + length > dict.data().len() {
                        return Err(Error::Corrupt);
                    }

                    dst[d..d + length]
                        .copy_from_slice(&dict.data()[dict_start..dict_start + length]);
                } else {
                    copy_within(dst, d, offset, length);
                }
                d += length;
            }
            TAG_COPY2 => {
                s += 3;
                if s > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u16::from_le_bytes(src[s - 2..s].try_into().unwrap()) as usize;
                let length = 1 + ((src[s - 3] >> 2) as usize);

                // Bounds check
                if offset == 0 || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from dictionary if needed
                if d < offset {
                    if d > MAX_DICT_SRC_OFFSET {
                        return Err(Error::Corrupt);
                    }

                    let dict_start = dict.data().len() - offset + d;
                    if dict_start + length > dict.data().len() {
                        return Err(Error::Corrupt);
                    }

                    dst[d..d + length]
                        .copy_from_slice(&dict.data()[dict_start..dict_start + length]);
                } else {
                    copy_within(dst, d, offset, length);
                }
                d += length;
            }
            TAG_COPY4 => {
                s += 5;
                if s > src.len() {
                    return Err(Error::Corrupt);
                }

                offset = u32::from_le_bytes(src[s - 4..s].try_into().unwrap()) as usize;
                let length = 1 + ((src[s - 5] >> 2) as usize);

                // Bounds check
                if offset == 0 || length > dst.len() - d {
                    return Err(Error::Corrupt);
                }

                // Copy from dictionary if needed
                if d < offset {
                    if d > MAX_DICT_SRC_OFFSET {
                        return Err(Error::Corrupt);
                    }

                    let dict_start = dict.data().len() - offset + d;
                    if dict_start + length > dict.data().len() {
                        return Err(Error::Corrupt);
                    }

                    dst[d..d + length]
                        .copy_from_slice(&dict.data()[dict_start..dict_start + length]);
                } else {
                    copy_within(dst, d, offset, length);
                }
                d += length;
            }
            _ => unreachable!(),
        }
    }

    // Verify we decoded exactly the right amount
    if d != dst.len() {
        return Err(Error::Corrupt);
    }

    Ok(())
}

/// Decode the length of a literal chunk
/// Returns (length, bytes_consumed)
fn decode_literal_length(src: &[u8]) -> Result<(usize, usize)> {
    let x = (src[0] >> 2) as u32;

    match x {
        0..=59 => Ok((x as usize + 1, 1)),
        60 => {
            if src.len() < 2 {
                return Err(Error::Corrupt);
            }
            Ok((src[1] as usize + 1, 2))
        }
        61 => {
            if src.len() < 3 {
                return Err(Error::Corrupt);
            }
            let len = u16::from_le_bytes(src[1..3].try_into().unwrap()) as usize;
            Ok((len + 1, 3))
        }
        62 => {
            if src.len() < 4 {
                return Err(Error::Corrupt);
            }
            // Read 4 bytes starting at src[0] and shift out the tag byte
            // — this turns 3 indexed reads into one unaligned word load.
            let len = (u32::from_le_bytes(src[0..4].try_into().unwrap()) >> 8) as usize;
            Ok((len + 1, 4))
        }
        63 => {
            if src.len() < 5 {
                return Err(Error::Corrupt);
            }
            let len = u32::from_le_bytes(src[1..5].try_into().unwrap()) as usize;
            Ok((len + 1, 5))
        }
        _ => Err(Error::Corrupt),
    }
}

/// Decode a COPY1 tag
/// Returns (offset, length, bytes_consumed)
fn decode_copy1(src: &[u8], last_offset: usize) -> Result<(usize, usize, usize)> {
    if src.len() < 2 {
        return Err(Error::Corrupt);
    }

    let toffset = ((src[0] as usize & 0xe0) << 3) | (src[1] as usize);
    let mut length = ((src[0] >> 2) & 0x7) as usize;

    if toffset == 0 {
        // Repeat offset - special encoding for length
        match length {
            5 => {
                if src.len() < 3 {
                    return Err(Error::Corrupt);
                }
                length = src[2] as usize + 4;
                Ok((last_offset, length + 4, 3))
            }
            6 => {
                if src.len() < 4 {
                    return Err(Error::Corrupt);
                }
                length = u16::from_le_bytes(src[2..4].try_into().unwrap()) as usize + (1 << 8);
                Ok((last_offset, length + 4, 4))
            }
            7 => {
                if src.len() < 5 {
                    return Err(Error::Corrupt);
                }
                // Read 4 bytes starting at src[1] and shift out the count byte.
                length =
                    (u32::from_le_bytes(src[1..5].try_into().unwrap()) >> 8) as usize + (1 << 16);
                Ok((last_offset, length + 4, 5))
            }
            _ => {
                // 0-4: use as-is
                Ok((last_offset, length + 4, 2))
            }
        }
    } else {
        Ok((toffset, length + 4, 2))
    }
}

/// Copy data within the same buffer, handling overlapping regions correctly.
/// This mimics the behavior of the Go implementation where overlapping copies
/// repeat the pattern (RLE-style).
#[inline]
fn copy_within(dst: &mut [u8], d: usize, offset: usize, length: usize) {
    let src_start = d - offset;

    // Non-overlapping: one memmove.
    if offset >= length {
        dst.copy_within(src_start..src_start + length, d);
        return;
    }

    // RLE case: offset == 1 is just a byte fill.
    if offset == 1 {
        let b = dst[src_start];
        dst[d..d + length].fill(b);
        return;
    }

    // Overlapping pattern fill (offset < length, offset > 1).
    // After each round we have at least `2 * available` valid bytes, so this
    // runs in O(log length) calls to copy_within — each of which can use the
    // built-in memmove and SIMD.
    let mut written = 0;
    while written < length {
        let available = offset + written;
        let chunk = (length - written).min(available);
        dst.copy_within(src_start..src_start + chunk, d + written);
        written += chunk;
    }
}