onelib 0.2.0

Rust implementation of the ONEcode file format
Documentation
//! INT_LIST delta-encoding and byte-width compaction.
//!
//! ONEcode compresses integer lists by:
//! 1. Converting to successive differences (`data[i] -= data[i-1]`).
//! 2. Determining the minimum byte width to represent all differences.
//! 3. Packing each difference into that many bytes, discarding leading
//!    (big-endian) or trailing (little-endian) zero/sign-extension bytes.
//!
//! The first element is stored separately (as a full LTF-encoded integer)
//! and is not included in the compacted output.

/// Delta-encode an integer list and compact to minimum byte width.
///
/// The list is modified **in place** (converted to differences). Returns
/// `(compacted_bytes, byte_width)` where `byte_width` is 1–8 and
/// `compacted_bytes` contains `(len - 1) * byte_width` bytes. If the list
/// has only one element, returns an empty vec with byte_width 1.
///
/// The first element of `data` is preserved (it is written separately by
/// the caller). Only elements `[1..]` are delta-encoded and compacted.
pub fn compact(data: &mut [i64], is_big_endian: bool) -> (Vec<u8>, u8) {
    if data.len() <= 1 {
        return (Vec::new(), 1);
    }

    // Convert to successive differences.
    for i in (1..data.len()).rev() {
        data[i] -= data[i - 1];
    }

    // Find minimum byte width for all differences.
    let mut mask: i64 = 0;
    for &diff in &data[1..] {
        if diff >= 0 {
            mask |= diff;
        } else {
            mask |= -(diff + 1);
        }
    }

    // Count bytes needed: at least 1, up to 8.
    let mut byte_width: u8 = 1;
    let mut m = mask >> 7;
    while m != 0 && byte_width < 8 {
        byte_width += 1;
        m >>= 8;
    }

    let d = byte_width as usize;
    let z = 8 - d; // bytes to skip
    let count = data.len() - 1;

    if z == 0 {
        // No compaction needed — copy raw bytes of differences.
        let mut out = Vec::with_capacity(count * 8);
        for &diff in &data[1..] {
            out.extend_from_slice(&diff.to_le_bytes());
        }
        return (out, 8);
    }

    let mut out = Vec::with_capacity(count * d);

    if is_big_endian {
        for &diff in &data[1..] {
            let bytes = diff.to_be_bytes();
            out.extend_from_slice(&bytes[z..]);
        }
    } else {
        for &diff in &data[1..] {
            let bytes = diff.to_le_bytes();
            out.extend_from_slice(&bytes[..d]);
        }
    }

    (out, byte_width)
}

/// Restore a compacted, delta-encoded integer list.
///
/// `first` is the first element (stored separately). `compacted` contains
/// `(len - 1) * byte_width` bytes. Returns the full list of `len` i64
/// values.
pub fn decompact(
    first: i64,
    compacted: &[u8],
    byte_width: u8,
    len: usize,
    is_big_endian: bool,
) -> Vec<i64> {
    let mut result = Vec::with_capacity(len);
    result.push(first);

    if len <= 1 {
        return result;
    }

    let d = byte_width as usize;

    // Expand each compacted element back to i64 with sign extension.
    for chunk in compacted.chunks_exact(d) {
        let val = if is_big_endian {
            // Big-endian: data is in the last `d` bytes of an 8-byte value.
            let sign_byte = if chunk[0] & 0x80 != 0 { 0xff } else { 0x00 };
            let mut bytes = [sign_byte; 8];
            bytes[8 - d..].copy_from_slice(chunk);
            i64::from_be_bytes(bytes)
        } else {
            // Little-endian: data is in the first `d` bytes.
            let sign_byte = if chunk[d - 1] & 0x80 != 0 { 0xff } else { 0x00 };
            let mut bytes = [sign_byte; 8];
            bytes[..d].copy_from_slice(chunk);
            i64::from_le_bytes(bytes)
        };
        result.push(val);
    }

    // Revert differencing.
    for i in 1..result.len() {
        result[i] += result[i - 1];
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn round_trip_ascending() {
        let original = vec![10, 20, 30, 40, 50];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, false);
        assert_eq!(bw, 1); // differences are all 10, fits in 1 byte

        let restored = decompact(original[0], &compacted, bw, original.len(), false);
        assert_eq!(restored, original);
    }

    #[test]
    fn round_trip_descending() {
        let original = vec![50, 40, 30, 20, 10];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, false);
        assert_eq!(bw, 1); // differences are all -10, fits in 1 signed byte

        let restored = decompact(original[0], &compacted, bw, original.len(), false);
        assert_eq!(restored, original);
    }

    #[test]
    fn round_trip_mixed() {
        let original = vec![100, 200, 150, 300, 0, -50];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, false);
        let restored = decompact(original[0], &compacted, bw, original.len(), false);
        assert_eq!(restored, original);
    }

    #[test]
    fn round_trip_big_endian() {
        let original = vec![1000, 2000, 3000, 4000];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, true);
        let restored = decompact(original[0], &compacted, bw, original.len(), true);
        assert_eq!(restored, original);
    }

    #[test]
    fn single_element() {
        let original = vec![42];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, false);
        assert!(compacted.is_empty());

        let restored = decompact(original[0], &compacted, bw, original.len(), false);
        assert_eq!(restored, original);
    }

    #[test]
    fn identical_elements() {
        let original = vec![7, 7, 7, 7, 7];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, false);
        assert_eq!(bw, 1); // all differences are 0

        let restored = decompact(original[0], &compacted, bw, original.len(), false);
        assert_eq!(restored, original);
    }

    #[test]
    fn large_values() {
        let original = vec![i64::MAX - 2, i64::MAX - 1, i64::MAX];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, false);
        assert_eq!(bw, 1); // differences are 1

        let restored = decompact(original[0], &compacted, bw, original.len(), false);
        assert_eq!(restored, original);
    }

    #[test]
    fn wide_spread() {
        let original = vec![0, 100_000, -100_000, 100_000];
        let mut data = original.clone();

        let (compacted, bw) = compact(&mut data, false);
        // 100_000 and -200_000 need 3 bytes
        assert!(bw >= 3);

        let restored = decompact(original[0], &compacted, bw, original.len(), false);
        assert_eq!(restored, original);
    }
}