cloudini 0.3.1 - Docs.rs

//! Zigzag varint encoding matching the Cloudini wire format.
//!
//! ## Encoding scheme
//!
//! 1. **Zigzag mapping** — maps signed integers to non-negative integers so that
//!    small magnitudes (positive and negative) produce small unsigned values:
//!    `0 → 0`, `-1 → 1`, `1 → 2`, `-2 → 3`, `2 → 4`, …
//! 2. **+1 shift** — the zigzag value is incremented by 1 before storage, reserving
//!    encoded byte `0x00` as a NaN sentinel for float fields.
//! 3. **7-bit little-endian chunks** — the shifted value is stored 7 bits at a time,
//!    LSB-first, with the high bit of each byte set to 1 if more bytes follow.

/// Encode a signed `i64` as a zigzag varint into `buf`.
///
/// `buf` must have at least 10 bytes of space. Returns the number of bytes written.
///
/// # Examples
///
/// ```rust
/// use cloudini::varint::encode_varint64;
///
/// let mut buf = [0u8; 10];
/// let n = encode_varint64(0, &mut buf);
/// assert_eq!(&buf[..n], &[0x01]); // zigzag(0)+1 = 1 → single byte
///
/// let n = encode_varint64(-1, &mut buf);
/// assert_eq!(&buf[..n], &[0x02]); // zigzag(-1)+1 = 2 → single byte
///
/// let n = encode_varint64(1000, &mut buf);
/// assert_eq!(&buf[..n], &[0xd1, 0x0f]); // two bytes for larger magnitude
/// ```
#[inline(always)]
pub fn encode_varint64(value: i64, buf: &mut [u8]) -> usize {
    // Zigzag: maps signed → unsigned so small |value| → small unsigned
    let mut val = ((value << 1) ^ (value >> 63)) as u64;
    val += 1; // reserve 0 for NaN marker

    let mut i = 0;
    while val > 0x7F {
        buf[i] = ((val & 0x7F) | 0x80) as u8;
        val >>= 7;
        i += 1;
    }
    buf[i] = val as u8;
    i + 1
}

/// Decode a zigzag varint from the start of `buf`.
///
/// Returns `(value, bytes_consumed)` on success.
///
/// Returns an error if the input is empty, truncated, overflows `i64`, or starts
/// with the NaN sentinel byte `0x00`.
///
/// # Examples
///
/// ```rust
/// use cloudini::varint::{encode_varint64, decode_varint};
///
/// let mut buf = [0u8; 10];
/// let n = encode_varint64(-42, &mut buf);
/// let (value, consumed) = decode_varint(&buf[..n]).unwrap();
/// assert_eq!(value, -42);
/// assert_eq!(consumed, n);
/// ```
#[inline(always)]
pub fn decode_varint(buf: &[u8]) -> crate::Result<(i64, usize)> {
    use crate::Error;
    if buf.is_empty() {
        return Err(Error::Truncated("varint: empty input".into()));
    }

    let b0 = buf[0];

    // Fast path: single-byte varint (MSB clear). This is the common case for
    // small coordinate deltas typical in LiDAR point clouds.
    if b0 & 0x80 == 0 {
        if b0 == 0 {
            return Err(Error::Truncated(
                "varint: NaN sentinel in non-NaN context".into(),
            ));
        }
        let uval = (b0 as u64) - 1;
        let val = ((uval >> 1) as i64) ^ -((uval & 1) as i64);
        return Ok((val, 1));
    }

    // Slow path: multi-byte varint.
    let mut uval: u64 = (b0 & 0x7f) as u64;
    let mut shift: u32 = 7;
    let mut i = 1usize;

    loop {
        if i >= buf.len() {
            return Err(Error::Truncated("varint: truncated".into()));
        }
        let byte = buf[i];
        i += 1;

        if shift >= 63 && (byte & 0x7f) > 1 {
            return Err(Error::Truncated("varint: value overflow".into()));
        }

        uval |= ((byte & 0x7f) as u64) << shift;
        shift += 7;

        if (byte & 0x80) == 0 {
            break;
        }
    }

    if uval == 0 {
        return Err(Error::Truncated(
            "varint: NaN sentinel in non-NaN context".into(),
        ));
    }
    uval -= 1;

    // Zigzag decode: (uval >> 1) XOR -(uval & 1)
    let val = ((uval >> 1) as i64) ^ -((uval & 1) as i64);
    Ok((val, i))
}

#[cfg(test)]
mod tests {
    use super::*;

    fn roundtrip(value: i64) {
        let mut buf = [0u8; 10];
        let n = encode_varint64(value, &mut buf);
        let (decoded, consumed) = decode_varint(&buf[..n]).unwrap();
        assert_eq!(decoded, value, "roundtrip failed for {}", value);
        assert_eq!(consumed, n);
    }

    #[test]
    fn test_varint_roundtrip() {
        for v in [
            0i64,
            1,
            -1,
            2,
            -2,
            127,
            -127,
            128,
            -128,
            1000,
            -1000,
            i32::MAX as i64,
            i32::MIN as i64,
            i64::MAX / 2,
            i64::MIN / 2,
        ] {
            roundtrip(v);
        }
    }

    /// Known-good byte sequences verified against the reference implementation.
    #[test]
    fn test_varint_known_values() {
        let cases: &[(i64, &[u8])] = &[
            (0, &[0x01]),
            (1, &[0x03]),
            (-1, &[0x02]),
            (127, &[0xff, 0x01]),
            (-127, &[0xfe, 0x01]),
            (1000, &[0xd1, 0x0f]),
            (-1000, &[0xd0, 0x0f]),
            (100000, &[0xc1, 0x9a, 0x0c]),
            (-100000, &[0xc0, 0x9a, 0x0c]),
        ];
        for &(value, expected) in cases {
            let mut buf = [0u8; 10];
            let n = encode_varint64(value, &mut buf);
            assert_eq!(
                &buf[..n],
                expected,
                "varint({value}) mismatch: got {:02x?}, expected {:02x?}",
                &buf[..n],
                expected
            );
        }
    }

    #[test]
    fn test_varint_nan_marker() {
        let buf = [0u8; 1];
        assert!(decode_varint(&buf).is_err());
    }
}