haz-cache 0.2.0

Content-addressed cache for haz task outputs using BLAKE3.
Documentation
//! Lowercase hexadecimal encoding for 32-byte digests.
//!
//! Used internally by the manifest format (`CACHE-011`) and the
//! sharded entry layout (`CACHE-010`). Output is always 64 ASCII
//! characters in `[0-9a-f]`.

use snafu::{Snafu, ensure};

/// Failure modes for [`decode_32`].
#[derive(Debug, Clone, PartialEq, Eq, Snafu)]
pub enum HexError {
    /// Input was not exactly 64 ASCII characters long.
    #[snafu(display("hex digest must be 64 characters; got {length}"))]
    InvalidLength {
        /// Byte length of the rejected input.
        length: usize,
    },

    /// Input contained a non-hex byte.
    #[snafu(display("hex digest contains invalid character {byte:#04x} at offset {offset}"))]
    InvalidByte {
        /// Offset of the offending byte (0-based).
        offset: usize,
        /// The offending byte.
        byte: u8,
    },
}

/// Encode a 32-byte digest as 64 lowercase hex characters.
#[must_use]
pub fn encode_32(bytes: &[u8; 32]) -> String {
    let mut out = String::with_capacity(64);
    for b in bytes {
        out.push(nibble(b >> 4));
        out.push(nibble(b & 0x0F));
    }
    out
}

/// Decode 64 lowercase or uppercase hex characters into a 32-byte
/// digest.
///
/// # Errors
///
/// Returns [`HexError::InvalidLength`] when the input is not 64
/// ASCII bytes long, [`HexError::InvalidByte`] when any character
/// is outside `[0-9A-Fa-f]`.
pub fn decode_32(s: &str) -> Result<[u8; 32], HexError> {
    let bytes = s.as_bytes();
    ensure!(
        bytes.len() == 64,
        InvalidLengthSnafu {
            length: bytes.len()
        }
    );
    let mut out = [0u8; 32];
    for (i, chunk) in bytes.chunks_exact(2).enumerate() {
        let hi = decode_nibble(chunk[0], i * 2)?;
        let lo = decode_nibble(chunk[1], i * 2 + 1)?;
        out[i] = (hi << 4) | lo;
    }
    Ok(out)
}

const fn nibble(n: u8) -> char {
    match n {
        0 => '0',
        1 => '1',
        2 => '2',
        3 => '3',
        4 => '4',
        5 => '5',
        6 => '6',
        7 => '7',
        8 => '8',
        9 => '9',
        10 => 'a',
        11 => 'b',
        12 => 'c',
        13 => 'd',
        14 => 'e',
        _ => 'f',
    }
}

fn decode_nibble(byte: u8, offset: usize) -> Result<u8, HexError> {
    match byte {
        b'0'..=b'9' => Ok(byte - b'0'),
        b'a'..=b'f' => Ok(byte - b'a' + 10),
        b'A'..=b'F' => Ok(byte - b'A' + 10),
        _ => Err(HexError::InvalidByte { offset, byte }),
    }
}

#[cfg(test)]
mod tests {
    use crate::hex::{HexError, decode_32, encode_32};

    #[test]
    fn encodes_all_zeros() {
        let bytes = [0u8; 32];
        let hex = encode_32(&bytes);
        assert_eq!(hex.len(), 64);
        assert!(hex.chars().all(|c| c == '0'));
    }

    #[test]
    fn encodes_all_ff() {
        let bytes = [0xFFu8; 32];
        let hex = encode_32(&bytes);
        assert_eq!(hex.len(), 64);
        assert!(hex.chars().all(|c| c == 'f'));
    }

    #[test]
    fn round_trip_random_pattern() {
        let mut bytes = [0u8; 32];
        for (i, b) in bytes.iter_mut().enumerate() {
            // Arbitrary, deterministic pattern: each byte distinct.
            *b = u8::try_from((i * 7) & 0xFF).unwrap();
        }
        let hex = encode_32(&bytes);
        let back = decode_32(&hex).unwrap();
        assert_eq!(back, bytes);
    }

    #[test]
    fn output_is_lowercase_only() {
        let bytes = [0xAB; 32];
        let hex = encode_32(&bytes);
        assert!(hex.chars().all(|c| !c.is_ascii_uppercase()));
        assert_eq!(&hex[..2], "ab");
    }

    #[test]
    fn decode_accepts_uppercase() {
        let lower = "ab".repeat(32);
        let upper = "AB".repeat(32);
        assert_eq!(decode_32(&lower).unwrap(), decode_32(&upper).unwrap());
    }

    #[test]
    fn decode_rejects_short_input() {
        let err = decode_32("ab").unwrap_err();
        assert!(matches!(err, HexError::InvalidLength { length: 2 }));
    }

    #[test]
    fn decode_rejects_long_input() {
        let s = "a".repeat(100);
        let err = decode_32(&s).unwrap_err();
        assert!(matches!(err, HexError::InvalidLength { length: 100 }));
    }

    #[test]
    fn decode_rejects_non_hex_character() {
        // 63 valid + 1 invalid (in the middle).
        let mut s = "a".repeat(64);
        s.replace_range(30..31, "z");
        let err = decode_32(&s).unwrap_err();
        match err {
            HexError::InvalidByte { offset, byte } => {
                assert_eq!(offset, 30);
                assert_eq!(byte, b'z');
            }
            HexError::InvalidLength { .. } => {
                panic!("expected InvalidByte, got {err:?}")
            }
        }
    }

    #[test]
    fn encode_matches_known_vector() {
        // First 4 bytes of BLAKE3("") = 0xaf, 0x13, 0x49, 0xb9.
        let mut bytes = [0u8; 32];
        bytes[0] = 0xaf;
        bytes[1] = 0x13;
        bytes[2] = 0x49;
        bytes[3] = 0xb9;
        let hex = encode_32(&bytes);
        assert_eq!(&hex[..8], "af1349b9");
    }
}