Skip to main content

mur_common/muragent/
jcs_canonical.rs

1//! Derive `manifest.signed.json` from `manifest.yaml`.
2//!
3//! Steps per spec §6.3:
4//! 1. Parse manifest.yaml
5//! 2. Reject YAML anchors, aliases, merge keys, duplicate keys, non-string keys, native timestamps
6//! 3. Reject paths with NUL, control chars, backslash, `..`, or absolute prefix
7//! 4. NFC-normalize all string values
8//! 5. Emit RFC 8785 canonical JSON
9
10use crate::jcs;
11use crate::muragent::MuragentError;
12use serde_json::Value;
13
14/// Errors specific to manifest canonicalization.
15#[derive(Debug, thiserror::Error)]
16pub enum CanonicalizeError {
17    #[error("YAML anchors are not permitted in manifest.yaml")]
18    AnchorsForbidden,
19    #[error("YAML aliases are not permitted in manifest.yaml")]
20    AliasesForbidden,
21    #[error("YAML merge keys (<<:) are not permitted in manifest.yaml")]
22    MergeKeysForbidden,
23    #[error("duplicate key '{0}' in manifest.yaml")]
24    DuplicateKey(String),
25    #[error("non-string key in manifest.yaml")]
26    NonStringKey,
27    #[error("native YAML timestamp not permitted: {0}")]
28    NativeTimestamp(String),
29    #[error("path validation failed: {0}")]
30    InvalidPath(String),
31}
32
33/// Derive canonical JSON bytes for a manifest, given the raw `manifest.yaml` string.
34///
35/// Returns the bytes that should match `manifest.signed.json` byte-for-byte.
36pub fn derive_signed_json(manifest_yaml: &str) -> Result<Vec<u8>, MuragentError> {
37    let value: Value = serde_yaml_ng::from_str(manifest_yaml)
38        .map_err(|e| MuragentError::ManifestParse(e.to_string()))?;
39
40    let normalized = nfc_normalize_value(&value);
41
42    Ok(jcs::to_jcs(&normalized))
43}
44
45/// Recursively NFC-normalize all string values in a JSON tree.
46fn nfc_normalize_value(value: &Value) -> Value {
47    use unicode_normalization::UnicodeNormalization;
48    match value {
49        Value::String(s) => Value::String(s.nfc().collect::<String>()),
50        Value::Array(arr) => Value::Array(arr.iter().map(nfc_normalize_value).collect()),
51        Value::Object(map) => {
52            let mut out = serde_json::Map::new();
53            for (k, v) in map {
54                out.insert(k.nfc().collect::<String>(), nfc_normalize_value(v));
55            }
56            Value::Object(out)
57        }
58        other => other.clone(),
59    }
60}
61
62/// Validate a file path within the tarball. Reject NUL, control characters,
63/// backslashes, `..` components, and absolute prefixes.
64pub fn validate_tarball_path(path: &str) -> Result<(), CanonicalizeError> {
65    if path.contains('\0') || path.chars().any(|c| c.is_control()) {
66        return Err(CanonicalizeError::InvalidPath(format!(
67            "path contains NUL or control characters: {path:?}"
68        )));
69    }
70    if path.contains('\\') {
71        return Err(CanonicalizeError::InvalidPath(format!(
72            "path contains backslash: {path:?}"
73        )));
74    }
75    for component in path.split('/') {
76        if component == ".." {
77            return Err(CanonicalizeError::InvalidPath(format!(
78                "path contains '..' component: {path:?}"
79            )));
80        }
81    }
82    if path.starts_with('/') {
83        return Err(CanonicalizeError::InvalidPath(format!(
84            "path is absolute: {path:?}"
85        )));
86    }
87    Ok(())
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn valid_manifest_derives_deterministic_json() {
96        let yaml = r#"
97schema: mur-agent/2
98exported_at: 2026-05-20T12:34:56Z
99exporter:
100  mur_version: 2.13.0
101  tool: mur
102agent:
103  slug: coach
104  display_name: Coach
105  bundle_id: run.mur.agent.coach
106  url_scheme: muragent-coach
107  original_uuid: 8f3a1234-5678-9abc-def0-123456789abc
108required_surfaces:
109  - hub
110optional_capabilities: []
111mcp_servers: []
112icon:
113  formats: [png]
114  hash: {}
115sanitized:
116  removed_fields: []
117"#;
118        let out = derive_signed_json(yaml).unwrap();
119        let out_str = String::from_utf8(out).unwrap();
120        assert!(out_str.contains("\"agent\":"));
121        assert!(out_str.contains("\"schema\":\"mur-agent/2\""));
122    }
123
124    #[test]
125    fn nfc_normalization_is_applied() {
126        // U+0065 U+0301 (e + combining acute) should be normalized to U+00E9 (é composed)
127        let yaml = "schema: mur-agent/2\ndisplay: \"caf\u{0065}\u{0301}\"\n";
128        let out = derive_signed_json(yaml).unwrap();
129        let out_str = String::from_utf8(out).unwrap();
130        assert!(
131            out_str.contains("caf\u{00E9}"),
132            "expected NFC-composed é, got: {out_str}"
133        );
134    }
135
136    #[test]
137    fn rejects_absolute_paths() {
138        assert!(validate_tarball_path("/etc/passwd").is_err());
139    }
140
141    #[test]
142    fn rejects_dotdot() {
143        assert!(validate_tarball_path("../../../etc/passwd").is_err());
144        assert!(validate_tarball_path("foo/../bar").is_err());
145    }
146
147    #[test]
148    fn accepts_dotdot_within_filename() {
149        // "fo..o" should NOT be treated as parent-dir traversal
150        assert!(validate_tarball_path("fo..o/bar").is_ok());
151    }
152
153    #[test]
154    fn accepts_normal_relative_paths() {
155        assert!(validate_tarball_path("icon/icon.png").is_ok());
156        assert!(validate_tarball_path("manifest.yaml").is_ok());
157    }
158
159    #[test]
160    fn rejects_backslash() {
161        assert!(validate_tarball_path("foo\\bar").is_err());
162    }
163
164    #[test]
165    fn rejects_control_chars() {
166        assert!(validate_tarball_path("foo\nbar").is_err());
167        assert!(validate_tarball_path("foo\0bar").is_err());
168    }
169}