compcol 0.1.0

A no_std collection of compression algorithms behind a uniform streaming trait, gated per-algorithm by Cargo features.
Documentation
//! Streaming round-trip tests for the LZ4 block-format algorithm.
//!
//! Tests run under the `std` test harness but the library itself is `no_std`.

#![cfg(feature = "lz4")]

use compcol::lz4::{Decoder, Encoder, Lz4};
use compcol::{Algorithm, Decoder as _, Encoder as _};

/// Encode `input` through the streaming trait using the supplied chunk sizes.
fn encode_chunked(input: &[u8], in_chunk: usize, out_chunk: usize) -> Vec<u8> {
    let mut enc = Encoder::new();
    let mut encoded = Vec::new();
    let mut buf = vec![0u8; out_chunk.max(1)];
    let mut i = 0;

    while i < input.len() {
        let end = (i + in_chunk).min(input.len());
        let chunk = &input[i..end];
        let mut consumed_in_chunk = 0;
        while consumed_in_chunk < chunk.len() {
            let p = enc.encode(&chunk[consumed_in_chunk..], &mut buf).unwrap();
            encoded.extend_from_slice(&buf[..p.written]);
            consumed_in_chunk += p.consumed;
            if p.consumed == 0 && p.written == 0 {
                panic!("encoder stalled mid-input");
            }
        }
        i = end;
    }

    loop {
        let p = enc.finish(&mut buf).unwrap();
        encoded.extend_from_slice(&buf[..p.written]);
        if p.done {
            break;
        }
        if p.written == 0 {
            panic!("encoder finish stalled");
        }
    }

    encoded
}

fn decode_chunked(encoded: &[u8], in_chunk: usize, out_chunk: usize) -> Vec<u8> {
    let mut dec = Decoder::new();
    let mut decoded = Vec::new();
    let mut buf = vec![0u8; out_chunk.max(1)];
    let mut i = 0;

    while i < encoded.len() {
        let end = (i + in_chunk).min(encoded.len());
        let chunk = &encoded[i..end];
        let mut consumed_in_chunk = 0;
        while consumed_in_chunk < chunk.len() {
            let p = dec.decode(&chunk[consumed_in_chunk..], &mut buf).unwrap();
            decoded.extend_from_slice(&buf[..p.written]);
            consumed_in_chunk += p.consumed;
            if p.consumed == 0 && p.written == 0 {
                panic!("decoder stalled mid-input");
            }
        }
        i = end;
    }

    loop {
        let p = dec.finish(&mut buf).unwrap();
        decoded.extend_from_slice(&buf[..p.written]);
        if p.done {
            break;
        }
        if p.written == 0 {
            panic!("decoder finish stalled");
        }
    }

    decoded
}

fn round_trip_with_chunks(input: &[u8], in_chunk: usize, out_chunk: usize) {
    let encoded = encode_chunked(input, in_chunk, out_chunk);
    let decoded = decode_chunked(&encoded, in_chunk.max(1), out_chunk.max(1));
    assert_eq!(decoded.len(), input.len(), "round-trip length mismatch");
    assert_eq!(decoded, input, "round-trip content mismatch");
}

fn round_trip(input: &[u8]) {
    // Single-shot: large enough buffers that everything fits in one call.
    let big = input.len().saturating_mul(2).max(1024);
    round_trip_with_chunks(input, big, big);
}

#[test]
fn name_is_lz4() {
    assert_eq!(<Lz4 as Algorithm>::NAME, "lz4");
}

#[test]
fn empty_input() {
    round_trip(&[]);
}

#[test]
fn single_byte() {
    round_trip(&[0x42]);
}

#[test]
fn short_input() {
    round_trip(b"hello, world");
}

#[test]
fn just_under_mflimit() {
    // 11 bytes — below LZ4's MFLIMIT of 12, so the encoder takes the
    // last-literals fast path.
    round_trip(b"hello world");
}

#[test]
fn just_above_mflimit() {
    // 13 bytes — above MFLIMIT. Forces the matcher to run even though there
    // is nothing to match.
    round_trip(b"hello world!!");
}

#[test]
fn long_run_of_one_byte() {
    // 10 KiB of one byte exercises the LZ77 overlapping-match case (the
    // copy length exceeds the offset).
    let input = vec![b'Z'; 10 * 1024];
    round_trip(&input);
}

#[test]
fn ascii_text_exceeding_64kib() {
    // Repeat a sentence until well past the canonical 64 KiB block size, so
    // the streaming wrapper has to split the input across multiple blocks.
    let sentence = b"the quick brown fox jumps over the lazy dog. ";
    let mut input = Vec::with_capacity(80 * 1024);
    while input.len() < 80 * 1024 {
        input.extend_from_slice(sentence);
    }
    round_trip(&input);

    // Sanity: the encoded form must be meaningfully smaller than the input
    // for this corpus — if it isn't, the matcher silently fell back to the
    // all-literals path.
    let encoded = encode_chunked(&input, input.len(), input.len() * 2);
    assert!(
        encoded.len() < input.len() / 2,
        "encoded size {} not less than half the input size {}",
        encoded.len(),
        input.len()
    );
}

#[test]
fn pseudo_random_input() {
    // Tiny LCG, fixed seed; keeps the test dependency-free.
    let mut state: u32 = 0xC0FFEEu32;
    let mut input = Vec::with_capacity(8192);
    for _ in 0..8192 {
        state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
        input.push((state >> 16) as u8);
    }
    round_trip(&input);
}

#[test]
fn chunked_one_byte_at_a_time() {
    // The acid test: 1-byte buffers on both input and output, on both sides.
    let input: Vec<u8> = (0..512u32).map(|i| (i % 7) as u8).collect();
    let encoded = encode_chunked(&input, 1, 1);
    let decoded = decode_chunked(&encoded, 1, 1);
    assert_eq!(decoded, input);
}

#[test]
fn chunked_one_byte_at_a_time_repetitive() {
    // 1-byte-on-both-sides for a payload that actually compresses, ensuring
    // mid-block matches survive the worst-case buffer fragmentation.
    let mut input = Vec::with_capacity(2048);
    for _ in 0..256 {
        input.extend_from_slice(b"abcdefgh");
    }
    let encoded = encode_chunked(&input, 1, 1);
    let decoded = decode_chunked(&encoded, 1, 1);
    assert_eq!(decoded, input);
}

#[test]
fn chunked_at_block_boundary() {
    // 128 KiB so it crosses two 64 KiB block boundaries. Feed the encoder
    // and decoder in chunks that don't align with the boundary.
    let mut input = Vec::with_capacity(128 * 1024);
    let sentence = b"compcol streaming test payload - repeat me. ";
    while input.len() < 128 * 1024 {
        input.extend_from_slice(sentence);
    }
    let encoded = encode_chunked(&input, 7919, 8191);
    let decoded = decode_chunked(&encoded, 7919, 8191);
    assert_eq!(decoded.len(), input.len());
    assert_eq!(decoded, input);
}

#[test]
fn reset_clears_state() {
    let mut enc = Encoder::new();
    let mut out = [0u8; 256];
    let _ = enc
        .encode(b"first run, will be discarded", &mut out)
        .unwrap();
    enc.reset();

    // After reset, a fresh round-trip should succeed.
    let mut produced = Vec::new();
    let p = enc.encode(b"second run", &mut out).unwrap();
    produced.extend_from_slice(&out[..p.written]);
    loop {
        let p = enc.finish(&mut out).unwrap();
        produced.extend_from_slice(&out[..p.written]);
        if p.done {
            break;
        }
        if p.written == 0 {
            panic!("finish stalled");
        }
    }

    let mut dec = Decoder::new();
    let mut decoded = Vec::new();
    let p = dec.decode(&produced, &mut out).unwrap();
    decoded.extend_from_slice(&out[..p.written]);
    let p = dec.finish(&mut out).unwrap();
    decoded.extend_from_slice(&out[..p.written]);
    assert!(p.done);
    assert_eq!(decoded, b"second run");
}

#[test]
fn decoder_rejects_zero_offset() {
    // Construct a tiny invalid block by hand: literal length 0, match
    // length excess 0 (token 0x00), offset 0 — invalid.
    // Wrap it in our framing: u32_le(3) || [0x00, 0x00, 0x00] || u32_le(0)
    let framed: [u8; 11] = [
        3, 0, 0, 0,    // block length
        0x00, // token: 0 literals, match excess 0 (i.e. 4 bytes)
        0x00, 0x00, // offset = 0
        0, 0, 0, 0, // terminator (won't be reached)
    ];
    let mut dec = Decoder::new();
    let mut out = [0u8; 32];
    let err = dec.decode(&framed, &mut out).unwrap_err();
    assert_eq!(err, compcol::Error::InvalidDistance);
}