shadow_core/agentlog/
canonical.rs1use serde_json::Value;
13use unicode_normalization::UnicodeNormalization;
14
15pub fn to_bytes(value: &Value) -> Vec<u8> {
21 let mut out = Vec::new();
22 write_value(&mut out, value);
23 out
24}
25
26fn write_value(out: &mut Vec<u8>, value: &Value) {
27 match value {
28 Value::Null => out.extend_from_slice(b"null"),
29 Value::Bool(true) => out.extend_from_slice(b"true"),
30 Value::Bool(false) => out.extend_from_slice(b"false"),
31 Value::Number(n) => write_number(out, n),
32 Value::String(s) => write_string(out, s),
33 Value::Array(arr) => {
34 out.push(b'[');
35 for (i, v) in arr.iter().enumerate() {
36 if i > 0 {
37 out.push(b',');
38 }
39 write_value(out, v);
40 }
41 out.push(b']');
42 }
43 Value::Object(map) => {
44 out.push(b'{');
45 let mut entries: Vec<(String, &Value)> = map
48 .iter()
49 .map(|(k, v)| (k.nfc().collect::<String>(), v))
50 .collect();
51 entries.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
52 for (i, (k, v)) in entries.iter().enumerate() {
53 if i > 0 {
54 out.push(b',');
55 }
56 write_string(out, k);
57 out.push(b':');
58 write_value(out, v);
59 }
60 out.push(b'}');
61 }
62 }
63}
64
65fn write_string(out: &mut Vec<u8>, s: &str) {
66 out.push(b'"');
67 let normalized: String = s.nfc().collect();
68 for c in normalized.chars() {
69 match c {
70 '"' => out.extend_from_slice(b"\\\""),
71 '\\' => out.extend_from_slice(b"\\\\"),
72 '\n' => out.extend_from_slice(b"\\n"),
73 '\r' => out.extend_from_slice(b"\\r"),
74 '\t' => out.extend_from_slice(b"\\t"),
75 '\u{08}' => out.extend_from_slice(b"\\b"),
76 '\u{0c}' => out.extend_from_slice(b"\\f"),
77 c if (c as u32) < 0x20 => {
78 let code = c as u32;
80 let buf = [
81 b'\\',
82 b'u',
83 b'0',
84 b'0',
85 hex_nibble((code >> 4) as u8),
86 hex_nibble((code & 0xF) as u8),
87 ];
88 out.extend_from_slice(&buf);
89 }
90 c => {
91 let mut buf = [0u8; 4];
92 out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
93 }
94 }
95 }
96 out.push(b'"');
97}
98
99fn hex_nibble(n: u8) -> u8 {
100 debug_assert!(n < 16);
101 match n {
102 0..=9 => b'0' + n,
103 _ => b'a' + (n - 10),
104 }
105}
106
107fn write_number(out: &mut Vec<u8>, n: &serde_json::Number) {
108 if let Some(i) = n.as_i64() {
111 out.extend_from_slice(i.to_string().as_bytes());
112 return;
113 }
114 if let Some(u) = n.as_u64() {
115 out.extend_from_slice(u.to_string().as_bytes());
116 return;
117 }
118 if let Some(f) = n.as_f64() {
119 if f == 0.0 {
121 out.push(b'0');
122 return;
123 }
124 if f.is_finite() {
130 let s = format!("{f}");
131 out.extend_from_slice(s.as_bytes());
132 return;
133 }
134 }
135 out.extend_from_slice(b"null");
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144 use serde_json::json;
145
146 #[test]
147 fn sorts_object_keys() {
148 assert_eq!(
149 to_bytes(&json!({"b": 2, "a": 1})),
150 br#"{"a":1,"b":2}"#.to_vec()
151 );
152 }
153
154 #[test]
155 fn recursive_sorting() {
156 assert_eq!(
157 to_bytes(&json!({"b": {"z": 1, "y": 2}, "a": 1})),
158 br#"{"a":1,"b":{"y":2,"z":1}}"#.to_vec()
159 );
160 }
161
162 #[test]
163 fn no_whitespace_in_arrays() {
164 assert_eq!(
165 to_bytes(&json!({"a": [1, 2, 3]})),
166 br#"{"a":[1,2,3]}"#.to_vec()
167 );
168 }
169
170 #[test]
171 fn booleans_and_null() {
172 assert_eq!(to_bytes(&json!(true)), b"true".to_vec());
173 assert_eq!(to_bytes(&json!(false)), b"false".to_vec());
174 assert_eq!(to_bytes(&json!(null)), b"null".to_vec());
175 }
176
177 #[test]
178 fn integer_numbers() {
179 assert_eq!(to_bytes(&json!(42)), b"42".to_vec());
180 assert_eq!(to_bytes(&json!(-17)), b"-17".to_vec());
181 assert_eq!(to_bytes(&json!(0)), b"0".to_vec());
182 }
183
184 #[test]
185 fn float_that_is_an_integer_emits_as_integer() {
186 let v: Value = serde_json::from_str("1.00").unwrap();
188 assert_eq!(to_bytes(&v), b"1".to_vec());
189 }
190
191 #[test]
192 fn fractional_float() {
193 assert_eq!(to_bytes(&json!(1.5)), b"1.5".to_vec());
194 assert_eq!(to_bytes(&json!(0.1)), b"0.1".to_vec());
195 }
196
197 #[test]
198 fn negative_zero_normalizes_to_zero() {
199 let v: Value = serde_json::from_str("-0.0").unwrap();
200 assert_eq!(to_bytes(&v), b"0".to_vec());
201 }
202
203 #[test]
204 fn string_mandatory_escapes() {
205 assert_eq!(
206 to_bytes(&json!({"x": "a\"b\\c"})),
207 br#"{"x":"a\"b\\c"}"#.to_vec()
208 );
209 }
210
211 #[test]
212 fn string_control_chars_use_shorthand_when_available() {
213 assert_eq!(to_bytes(&json!("\n")), br#""\n""#.to_vec());
214 assert_eq!(to_bytes(&json!("\t")), br#""\t""#.to_vec());
215 assert_eq!(to_bytes(&json!("\r")), br#""\r""#.to_vec());
216 }
217
218 #[test]
219 fn string_other_control_chars_use_u00xx() {
220 let mut expected_01 = Vec::new();
222 expected_01.extend_from_slice(b"\"\\u0001\"");
223 assert_eq!(to_bytes(&json!("\u{01}")), expected_01);
224 let mut expected_1f = Vec::new();
226 expected_1f.extend_from_slice(b"\"\\u001f\"");
227 assert_eq!(to_bytes(&json!("\u{1f}")), expected_1f);
228 }
229
230 #[test]
231 fn non_ascii_emitted_literally() {
232 let out = to_bytes(&json!("é"));
235 assert_eq!(out, &[b'"', 0xc3, 0xa9, b'"']);
236 }
237
238 #[test]
239 fn utf8_nfc_collapses_equivalent_forms() {
240 let decomposed = "e\u{0301}";
243 let precomposed = "\u{00e9}";
244 assert_eq!(to_bytes(&json!(decomposed)), to_bytes(&json!(precomposed)));
245 }
246
247 #[test]
248 fn utf8_nfc_applied_to_object_keys() {
249 let v = json!({ "é": 1, "e\u{0301}": 2 });
258 let out = to_bytes(&v);
259 let s = std::str::from_utf8(&out).unwrap();
260 assert!(s.starts_with(r#"{"é":"#));
264 }
265
266 #[test]
267 fn idempotent_roundtrip() {
268 let v = json!({"b": 2, "a": {"d": 3, "c": 4}, "arr": [{"y": 1, "x": 2}]});
269 let once = to_bytes(&v);
270 let reparsed: Value = serde_json::from_slice(&once).unwrap();
271 let twice = to_bytes(&reparsed);
272 assert_eq!(once, twice);
273 }
274
275 #[test]
276 fn spec_5_6_known_vector_canonical_bytes() {
277 let payload = json!({"hello": "world"});
279 assert_eq!(to_bytes(&payload), br#"{"hello":"world"}"#.to_vec());
280 }
281}