atlas-archive-core 1.1.0

High-performance compression library with adaptive context modeling (Loom) and .nyx archives
Documentation
//! Run-Length Encoding (RLE) for Atlas preprocessing.
//! Uses escape-byte encoding to avoid ambiguity.
//! Format: 0xFF is escape byte
//!   - 0xFF 0x00 = literal 0xFF
//!   - 0xFF [count] [byte] where count >= 1 means (count + 3) copies of byte  
//!   (so runs of 4+ identical bytes are encoded)

use crate::alloc::vec::Vec;

const RLE_ESCAPE: u8 = 0xFF;

/// RLE encoding with escape byte.
pub fn rle_encode(data: &[u8]) -> Vec<u8> {
    let mut out = Vec::with_capacity(data.len());
    let mut i = 0;

    while i < data.len() {
        let b = data[i];

        // Count consecutive identical bytes
        let mut run_len = 1;
        while i + run_len < data.len() && data[i + run_len] == b && run_len < 258 {
            run_len += 1;
        }

        if run_len >= 4 {
            // Encode as RLE: escape, count-3, byte
            // count-3 because minimum run is 4, so we encode (count-3) in 0..255
            out.push(RLE_ESCAPE);
            out.push((run_len - 3) as u8);
            out.push(b);
            i += run_len;
        } else {
            // Output literal bytes, escaping 0xFF
            for _ in 0..run_len {
                if b == RLE_ESCAPE {
                    out.push(RLE_ESCAPE);
                    out.push(0x00); // Escaped literal 0xFF
                } else {
                    out.push(b);
                }
            }
            i += run_len;
        }
    }

    out
}

/// RLE decoding with escape byte.
pub fn rle_decode(data: &[u8]) -> Vec<u8> {
    #[cfg(feature = "std")]
    {
        if data.len() > 20000 && data.len() < 30000 {
            // Specific dump for this failing test case
            let _ = std::fs::write("rle_input.bin", data);
            std::println!("[RLE] Dumped {} bytes to rle_input.bin", data.len());
        }
    }
    let mut out = Vec::with_capacity(data.len());
    let mut i = 0;

    while i < data.len() {
        if data[i] == RLE_ESCAPE {
            if i + 1 >= data.len() {
                // Truncated escape - just output the escape byte
                out.push(RLE_ESCAPE);
                i += 1;
                continue;
            }

            let code = data[i + 1];
            if code == 0x00 {
                // Escaped literal 0xFF
                out.push(RLE_ESCAPE);
                i += 2;
            } else {
                // RLE run: count is (code + 3), byte is data[i+2]
                if i + 2 >= data.len() {
                    // Truncated RLE - output what we can
                    out.push(RLE_ESCAPE);
                    out.push(code);
                    i += 2;
                    continue;
                }

                let run_len = (code as usize) + 3;
                let byte = data[i + 2];
                for _ in 0..run_len {
                    out.push(byte);
                }
                i += 3;
            }
        } else {
            out.push(data[i]);
            i += 1;
        }
    }

    out
}