agent-rooms 0.1.0

//! Parley simplified canonical JSON — SPEC §4.
//!
//! This is **not** RFC 8785 JCS. It matches Python's
//! `json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False)`
//! byte-for-byte:
//!
//! 1. Object keys sorted lexicographically by UTF-16 code points (Python's default).
//! 2. No whitespace; `,` / `:` separators.
//! 3. UTF-8 output, non-ASCII as literal UTF-8 (not `\u`-escaped).
//! 4. Standard JSON string escapes: `\"`, `\\`, `\b`, `\f`, `\n`, `\r`, `\t`,
//!    and `\u00xx` for any other control char < 0x20.
//! 5. Forward slash is not escaped.
//! 6. Numbers: integers only (SPEC forbids floats in signed payloads).

use serde_json::Value;
use sha2::{Digest, Sha256};

use crate::error::Error;

/// Canonical-encode a `serde_json::Value` per SPEC §4. Returns UTF-8 bytes.
pub fn canonical_json(value: &Value) -> Vec<u8> {
    let mut out = Vec::with_capacity(64);
    write_value(&mut out, value);
    out
}

/// SHA-256 of the canonical bytes, returned as lowercase hex (no prefix).
pub fn sha256_hex(value: &Value) -> String {
    let bytes = canonical_json(value);
    let digest = Sha256::digest(&bytes);
    let mut s = String::with_capacity(64);
    for b in digest {
        s.push_str(&format!("{:02x}", b));
    }
    s
}

/// Parse a JSON string into a `Value` for canonical encoding.
pub fn parse(json: &str) -> Result<Value, Error> {
    serde_json::from_str(json).map_err(|e| Error::InvalidJson(e.to_string()))
}

fn write_value(out: &mut Vec<u8>, value: &Value) {
    match value {
        Value::Null => out.extend_from_slice(b"null"),
        Value::Bool(true) => out.extend_from_slice(b"true"),
        Value::Bool(false) => out.extend_from_slice(b"false"),
        Value::Number(n) => out.extend_from_slice(n.to_string().as_bytes()),
        Value::String(s) => write_string(out, s),
        Value::Array(arr) => {
            out.push(b'[');
            for (i, v) in arr.iter().enumerate() {
                if i > 0 {
                    out.push(b',');
                }
                write_value(out, v);
            }
            out.push(b']');
        }
        Value::Object(map) => {
            // Python sorts by UTF-16 code points. For BMP code points this is
            // identical to sorting by Unicode scalar / UTF-8 byte order.
            // Surrogate-pair ordering edge case is not exercised by the SPEC
            // (all keys are ASCII protocol names) so we sort by &str (UTF-8).
            let mut keys: Vec<&String> = map.keys().collect();
            keys.sort_by(|a, b| utf16_cmp(a, b));
            out.push(b'{');
            for (i, k) in keys.iter().enumerate() {
                if i > 0 {
                    out.push(b',');
                }
                write_string(out, k);
                out.push(b':');
                write_value(out, &map[k.as_str()]);
            }
            out.push(b'}');
        }
    }
}

/// Compare two strings by UTF-16 code units, matching Python's default
/// dict-key ordering. For all-ASCII keys this is identical to byte order;
/// the divergence only matters above U+FFFF.
fn utf16_cmp(a: &str, b: &str) -> std::cmp::Ordering {
    let mut ai = a.encode_utf16();
    let mut bi = b.encode_utf16();
    loop {
        match (ai.next(), bi.next()) {
            (Some(x), Some(y)) => match x.cmp(&y) {
                std::cmp::Ordering::Equal => continue,
                non_eq => return non_eq,
            },
            (Some(_), None) => return std::cmp::Ordering::Greater,
            (None, Some(_)) => return std::cmp::Ordering::Less,
            (None, None) => return std::cmp::Ordering::Equal,
        }
    }
}

fn write_string(out: &mut Vec<u8>, s: &str) {
    out.push(b'"');
    for ch in s.chars() {
        match ch {
            '"' => out.extend_from_slice(b"\\\""),
            '\\' => out.extend_from_slice(b"\\\\"),
            '\u{0008}' => out.extend_from_slice(b"\\b"),
            '\u{0009}' => out.extend_from_slice(b"\\t"),
            '\u{000A}' => out.extend_from_slice(b"\\n"),
            '\u{000C}' => out.extend_from_slice(b"\\f"),
            '\u{000D}' => out.extend_from_slice(b"\\r"),
            c if (c as u32) < 0x20 => {
                let buf = format!("\\u{:04x}", c as u32);
                out.extend_from_slice(buf.as_bytes());
            }
            // ensure_ascii=False: emit any other char as literal UTF-8.
            c => {
                let mut buf = [0u8; 4];
                out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
            }
        }
    }
    out.push(b'"');
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    #[test]
    fn empty_object() {
        assert_eq!(canonical_json(&json!({})), b"{}");
    }

    #[test]
    fn sorts_keys() {
        assert_eq!(
            canonical_json(&json!({"b": 2, "a": 1})),
            br#"{"a":1,"b":2}"#
        );
    }

    #[test]
    fn unicode_literal() {
        let v = json!({"topic": "café"});
        assert_eq!(canonical_json(&v), "{\"topic\":\"café\"}".as_bytes());
    }

    #[test]
    fn string_escapes() {
        let v = json!({"s": "line1\nline2\t\"quoted\""});
        assert_eq!(canonical_json(&v), br#"{"s":"line1\nline2\t\"quoted\""}"#);
    }
}