grc-20 0.3.1

GRC-20 v2 binary property graph format for decentralized knowledge networks
Documentation
//! UUID-based identifiers for GRC-20.
//!
//! All identifiers in GRC-20 are RFC 4122 UUIDs stored as 16 raw bytes.

use sha2::{Digest, Sha256};

/// A 16-byte UUID identifier.
///
/// This is the universal identifier type for entities, relations, properties,
/// types, spaces, authors, and all other objects in GRC-20.
pub type Id = [u8; 16];

/// The zero/nil UUID.
pub const NIL_ID: Id = [0u8; 16];

/// Derives a UUIDv8 from input bytes using SHA-256.
///
/// This implements the `derived_uuid` function from spec Section 2.1:
/// ```text
/// hash = SHA-256(input_bytes)[0:16]
/// hash[6] = (hash[6] & 0x0F) | 0x80  // version 8
/// hash[8] = (hash[8] & 0x3F) | 0x80  // RFC 4122 variant
/// ```
pub fn derived_uuid(input: &[u8]) -> Id {
    let hash = Sha256::digest(input);
    let mut id = [0u8; 16];
    id.copy_from_slice(&hash[..16]);

    // Set version 8 (bits 4-7 of byte 6)
    id[6] = (id[6] & 0x0F) | 0x80;
    // Set RFC 4122 variant (bits 6-7 of byte 8)
    id[8] = (id[8] & 0x3F) | 0x80;

    id
}

/// Computes the value identity hash for a non-TEXT value.
///
/// ```text
/// value_id = SHA-256(property_id || canonical_payload)[0:16]
/// ```
pub fn value_id(property_id: &Id, canonical_payload: &[u8]) -> Id {
    let mut hasher = Sha256::new();
    hasher.update(property_id);
    hasher.update(canonical_payload);
    let hash = hasher.finalize();

    let mut id = [0u8; 16];
    id.copy_from_slice(&hash[..16]);
    id
}

/// Computes the value identity hash for a TEXT value with language.
///
/// ```text
/// value_id = SHA-256(property_id || canonical_payload || language_id)[0:16]
/// ```
///
/// If `language_id` is `None`, uses 16 zero bytes (default language).
pub fn text_value_id(property_id: &Id, text: &[u8], language_id: Option<&Id>) -> Id {
    let mut hasher = Sha256::new();
    hasher.update(property_id);
    hasher.update(text);
    hasher.update(language_id.unwrap_or(&NIL_ID));
    let hash = hasher.finalize();

    let mut id = [0u8; 16];
    id.copy_from_slice(&hash[..16]);
    id
}

/// Derives a unique-mode relation ID.
///
/// ```text
/// id = derived_uuid(from_id || to_id || type_id)
/// ```
pub fn unique_relation_id(from_id: &Id, to_id: &Id, type_id: &Id) -> Id {
    let mut input = [0u8; 48];
    input[0..16].copy_from_slice(from_id);
    input[16..32].copy_from_slice(to_id);
    input[32..48].copy_from_slice(type_id);
    derived_uuid(&input)
}

/// Domain separator prefix for relation entity derivation.
const RELATION_ENTITY_PREFIX: &[u8] = b"grc20:relation-entity:";

/// Derives the reified entity ID from a relation ID.
///
/// ```text
/// entity_id = derived_uuid("grc20:relation-entity:" || relation_id)
/// ```
///
/// This is used when no explicit entity ID is provided in CreateRelation,
/// ensuring deterministic entity IDs for both unique and instance mode relations.
pub fn relation_entity_id(relation_id: &Id) -> Id {
    let mut input = Vec::with_capacity(RELATION_ENTITY_PREFIX.len() + 16);
    input.extend_from_slice(RELATION_ENTITY_PREFIX);
    input.extend_from_slice(relation_id);
    derived_uuid(&input)
}

/// Bitcoin Base58 alphabet.
const BASE58_ALPHABET: &[u8] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";

/// Reverse lookup table for Base58 decoding (maps ASCII byte to value, 0xFF = invalid).
const BASE58_DECODE: [u8; 128] = {
    let mut table = [0xFFu8; 128];
    let mut i = 0u8;
    while (i as usize) < BASE58_ALPHABET.len() {
        table[BASE58_ALPHABET[i as usize] as usize] = i;
        i += 1;
    }
    table
};

/// Formats a UUID as Base58 (Bitcoin alphabet).
pub fn format_id(id: &Id) -> String {
    // Count leading zero bytes
    let mut leading_zeros = 0;
    for &b in id.iter() {
        if b == 0 {
            leading_zeros += 1;
        } else {
            break;
        }
    }

    // Convert bytes to base58 using big-number division
    // Work with a mutable copy
    let mut bytes = *id;
    let mut result = Vec::with_capacity(22);

    loop {
        // Check if all bytes are zero
        let mut all_zero = true;
        for &b in bytes.iter() {
            if b != 0 {
                all_zero = false;
                break;
            }
        }
        if all_zero {
            break;
        }

        // Divide the big number by 58, collecting remainder
        let mut remainder = 0u32;
        for byte in bytes.iter_mut() {
            let acc = (remainder << 8) | (*byte as u32);
            *byte = (acc / 58) as u8;
            remainder = acc % 58;
        }
        result.push(BASE58_ALPHABET[remainder as usize]);
    }

    // Add '1' for each leading zero byte
    for _ in 0..leading_zeros {
        result.push(b'1');
    }

    result.reverse();
    // SAFETY: BASE58_ALPHABET contains only ASCII characters
    unsafe { String::from_utf8_unchecked(result) }
}

/// Parses a UUID from Base58 string, hex string (with or without hyphens).
pub fn parse_id(s: &str) -> Option<Id> {
    // Try hex first (32 hex chars, or 36 with hyphens)
    let stripped: String = s.chars().filter(|c| *c != '-').collect();
    if stripped.len() == 32 && stripped.chars().all(|c| c.is_ascii_hexdigit()) {
        return parse_hex_id(&stripped);
    }

    // Try Base58
    parse_base58_id(s)
}

/// Parses a UUID from hex string.
fn parse_hex_id(hex: &str) -> Option<Id> {
    if hex.len() != 32 {
        return None;
    }
    let mut id = [0u8; 16];
    for (i, chunk) in hex.as_bytes().chunks(2).enumerate() {
        let byte_str = std::str::from_utf8(chunk).ok()?;
        id[i] = u8::from_str_radix(byte_str, 16).ok()?;
    }
    Some(id)
}

/// Parses a UUID from Base58 string.
fn parse_base58_id(s: &str) -> Option<Id> {
    // Count leading '1' characters (they represent leading zero bytes)
    let mut leading_ones = 0;
    for c in s.chars() {
        if c == '1' {
            leading_ones += 1;
        } else {
            break;
        }
    }

    // Decode Base58 to bytes using big-number multiplication
    let mut result = [0u8; 16];
    for &c in s.as_bytes() {
        if c >= 128 {
            return None;
        }
        let val = BASE58_DECODE[c as usize];
        if val == 0xFF {
            return None;
        }

        // Multiply result by 58 and add val
        let mut carry = val as u32;
        for byte in result.iter_mut().rev() {
            let acc = (*byte as u32) * 58 + carry;
            *byte = (acc & 0xFF) as u8;
            carry = acc >> 8;
        }
        if carry != 0 {
            return None; // Overflow - value too large for 16 bytes
        }
    }

    // Verify leading zeros match leading '1' chars
    let mut leading_zeros = 0;
    for &b in result.iter() {
        if b == 0 {
            leading_zeros += 1;
        } else {
            break;
        }
    }
    if leading_ones > leading_zeros {
        return None;
    }

    Some(result)
}

/// Formats a UUID as non-hyphenated lowercase hex.
pub fn format_id_hex(id: &Id) -> String {
    let mut s = String::with_capacity(32);
    for byte in id {
        s.push_str(&format!("{:02x}", byte));
    }
    s
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_derived_uuid_version_and_variant() {
        let id = derived_uuid(b"test");
        // Version should be 8 (0x80 in high nibble of byte 6)
        assert_eq!(id[6] & 0xF0, 0x80);
        // Variant should be RFC 4122 (0b10 in high 2 bits of byte 8)
        assert_eq!(id[8] & 0xC0, 0x80);
    }

    #[test]
    fn test_derived_uuid_deterministic() {
        let id1 = derived_uuid(b"hello world");
        let id2 = derived_uuid(b"hello world");
        assert_eq!(id1, id2);

        let id3 = derived_uuid(b"different");
        assert_ne!(id1, id3);
    }

    #[test]
    fn test_format_parse_roundtrip() {
        let id = derived_uuid(b"test");
        let formatted = format_id(&id);
        let parsed = parse_id(&formatted).unwrap();
        assert_eq!(id, parsed);
    }

    #[test]
    fn test_base58_format() {
        // Base58 output should be shorter than hex
        let id = derived_uuid(b"test");
        let b58 = format_id(&id);
        assert!(b58.len() <= 22, "Base58 of 16 bytes should be <= 22 chars, got {}", b58.len());
        // Should only contain Base58 alphabet characters
        for c in b58.chars() {
            assert!(BASE58_ALPHABET.contains(&(c as u8)), "unexpected char: {}", c);
        }
    }

    #[test]
    fn test_parse_hex_backwards_compat() {
        let hex = "550e8400e29b41d4a716446655440000";
        let with_hyphens = "550e8400-e29b-41d4-a716-446655440000";

        let id1 = parse_id(hex).unwrap();
        let id2 = parse_id(with_hyphens).unwrap();
        assert_eq!(id1, id2);
    }

    #[test]
    fn test_base58_roundtrip_various() {
        // Test with several different IDs
        for input in [b"hello" as &[u8], b"world", b"test123", b"grc20"] {
            let id = derived_uuid(input);
            let formatted = format_id(&id);
            let parsed = parse_id(&formatted).unwrap();
            assert_eq!(id, parsed, "roundtrip failed for {:?}", input);
        }
    }

    #[test]
    fn test_nil_id_roundtrip() {
        let formatted = format_id(&NIL_ID);
        let parsed = parse_id(&formatted).unwrap();
        assert_eq!(NIL_ID, parsed);
    }

    #[test]
    fn test_unique_relation_id() {
        let from = [1u8; 16];
        let to = [2u8; 16];
        let type_id = [3u8; 16];

        let id1 = unique_relation_id(&from, &to, &type_id);
        let id2 = unique_relation_id(&from, &to, &type_id);
        assert_eq!(id1, id2);

        // Different inputs produce different IDs
        let id3 = unique_relation_id(&to, &from, &type_id);
        assert_ne!(id1, id3);
    }

    #[test]
    fn test_relation_entity_id() {
        let rel_id = [1u8; 16];

        // Deterministic
        let entity1 = relation_entity_id(&rel_id);
        let entity2 = relation_entity_id(&rel_id);
        assert_eq!(entity1, entity2);

        // Different relation IDs produce different entity IDs
        let rel_id2 = [2u8; 16];
        let entity3 = relation_entity_id(&rel_id2);
        assert_ne!(entity1, entity3);

        // Entity ID is different from relation ID
        assert_ne!(entity1, rel_id);

        // Verify it's a valid UUIDv8
        assert_eq!(entity1[6] & 0xF0, 0x80);
        assert_eq!(entity1[8] & 0xC0, 0x80);
    }
}