1use serde_json::Value;
6use unicode_normalization::UnicodeNormalization;
7
8use crate::errors::{AshError, AshErrorCode};
9
10pub fn canonicalize_json(input: &str) -> Result<String, AshError> {
40 let value: Value = serde_json::from_str(input).map_err(|e| {
42 AshError::new(
43 AshErrorCode::CanonicalizationFailed,
44 format!("Invalid JSON: {}", e),
45 )
46 })?;
47
48 let canonical = canonicalize_value(&value)?;
50
51 serde_json::to_string(&canonical).map_err(|e| {
53 AshError::new(
54 AshErrorCode::CanonicalizationFailed,
55 format!("Failed to serialize: {}", e),
56 )
57 })
58}
59
60fn canonicalize_value(value: &Value) -> Result<Value, AshError> {
62 match value {
63 Value::Null => Ok(Value::Null),
64 Value::Bool(b) => Ok(Value::Bool(*b)),
65 Value::Number(n) => canonicalize_number(n),
66 Value::String(s) => Ok(Value::String(canonicalize_string(s))),
67 Value::Array(arr) => {
68 let canonical: Result<Vec<Value>, AshError> =
69 arr.iter().map(canonicalize_value).collect();
70 Ok(Value::Array(canonical?))
71 }
72 Value::Object(obj) => {
73 let mut sorted: Vec<(&String, &Value)> = obj.iter().collect();
75 sorted.sort_by(|a, b| a.0.cmp(b.0));
76
77 let mut canonical = serde_json::Map::new();
78 for (key, val) in sorted {
79 let canonical_key = canonicalize_string(key);
80 let canonical_val = canonicalize_value(val)?;
81 canonical.insert(canonical_key, canonical_val);
82 }
83 Ok(Value::Object(canonical))
84 }
85 }
86}
87
88fn canonicalize_number(n: &serde_json::Number) -> Result<Value, AshError> {
90 if let Some(i) = n.as_i64() {
94 if i == 0 {
96 return Ok(Value::Number(serde_json::Number::from(0)));
97 }
98 return Ok(Value::Number(serde_json::Number::from(i)));
99 }
100
101 if let Some(u) = n.as_u64() {
102 return Ok(Value::Number(serde_json::Number::from(u)));
103 }
104
105 if let Some(f) = n.as_f64() {
106 if f.is_nan() {
108 return Err(AshError::new(
109 AshErrorCode::CanonicalizationFailed,
110 "NaN is not supported in ASH canonicalization",
111 ));
112 }
113 if f.is_infinite() {
114 return Err(AshError::new(
115 AshErrorCode::CanonicalizationFailed,
116 "Infinity is not supported in ASH canonicalization",
117 ));
118 }
119
120 let f = if f == 0.0 && f.is_sign_negative() {
122 0.0
123 } else {
124 f
125 };
126
127 serde_json::Number::from_f64(f)
129 .map(Value::Number)
130 .ok_or_else(|| {
131 AshError::new(
132 AshErrorCode::CanonicalizationFailed,
133 "Failed to canonicalize number",
134 )
135 })
136 } else {
137 Err(AshError::new(
138 AshErrorCode::CanonicalizationFailed,
139 "Unsupported number format",
140 ))
141 }
142}
143
144fn canonicalize_string(s: &str) -> String {
146 s.nfc().collect()
147}
148
149pub fn canonicalize_urlencoded(input: &str) -> Result<String, AshError> {
170 if input.is_empty() {
171 return Ok(String::new());
172 }
173
174 let mut pairs: Vec<(String, String)> = Vec::new();
176
177 for part in input.split('&') {
178 if part.is_empty() {
179 continue;
180 }
181
182 let (key, value) = match part.find('=') {
183 Some(pos) => (&part[..pos], &part[pos + 1..]),
184 None => (part, ""),
185 };
186
187 let decoded_key = percent_decode(key)?;
189 let decoded_value = percent_decode(value)?;
190
191 let normalized_key: String = decoded_key.nfc().collect();
193 let normalized_value: String = decoded_value.nfc().collect();
194
195 pairs.push((normalized_key, normalized_value));
196 }
197
198 pairs.sort_by(|a, b| a.0.cmp(&b.0));
200
201 let encoded: Vec<String> = pairs
203 .into_iter()
204 .map(|(k, v)| format!("{}={}", percent_encode(&k), percent_encode(&v)))
205 .collect();
206
207 Ok(encoded.join("&"))
208}
209
210fn percent_decode(input: &str) -> Result<String, AshError> {
212 let mut result = String::with_capacity(input.len());
213 let mut chars = input.chars().peekable();
214
215 while let Some(ch) = chars.next() {
216 if ch == '%' {
217 let hex: String = chars.by_ref().take(2).collect();
219 if hex.len() != 2 {
220 return Err(AshError::new(
221 AshErrorCode::CanonicalizationFailed,
222 "Invalid percent encoding",
223 ));
224 }
225 let byte = u8::from_str_radix(&hex, 16).map_err(|_| {
226 AshError::new(
227 AshErrorCode::CanonicalizationFailed,
228 "Invalid percent encoding hex",
229 )
230 })?;
231 result.push(byte as char);
232 } else if ch == '+' {
233 result.push(' ');
235 } else {
236 result.push(ch);
237 }
238 }
239
240 Ok(result)
241}
242
243pub fn canonicalize_query(input: &str) -> Result<String, AshError> {
272 let query = input.strip_prefix('?').unwrap_or(input);
274
275 if query.is_empty() {
276 return Ok(String::new());
277 }
278
279 let mut pairs: Vec<(String, String)> = Vec::new();
281
282 for part in query.split('&') {
283 if part.is_empty() {
284 continue;
285 }
286
287 let (key, value) = match part.find('=') {
288 Some(pos) => (&part[..pos], &part[pos + 1..]),
289 None => (part, ""), };
291
292 let decoded_key = percent_decode(key)?;
294 let decoded_value = percent_decode(value)?;
295
296 let normalized_key: String = decoded_key.nfc().collect();
298 let normalized_value: String = decoded_value.nfc().collect();
299
300 pairs.push((normalized_key, normalized_value));
301 }
302
303 pairs.sort_by(|a, b| a.0.cmp(&b.0));
305
306 let encoded: Vec<String> = pairs
308 .into_iter()
309 .map(|(k, v)| {
310 format!(
311 "{}={}",
312 percent_encode_uppercase(&k),
313 percent_encode_uppercase(&v)
314 )
315 })
316 .collect();
317
318 Ok(encoded.join("&"))
319}
320
321fn percent_encode_uppercase(input: &str) -> String {
323 let mut result = String::with_capacity(input.len() * 3);
324
325 for ch in input.chars() {
326 match ch {
327 'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => {
328 result.push(ch);
329 }
330 ' ' => {
331 result.push_str("%20");
332 }
333 _ => {
334 for byte in ch.to_string().as_bytes() {
335 result.push('%');
336 result.push_str(&format!("{:02X}", byte)); }
338 }
339 }
340 }
341
342 result
343}
344
345fn percent_encode(input: &str) -> String {
347 let mut result = String::with_capacity(input.len() * 3);
348
349 for ch in input.chars() {
350 match ch {
351 'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => {
352 result.push(ch);
353 }
354 ' ' => {
355 result.push_str("%20");
357 }
358 _ => {
359 for byte in ch.to_string().as_bytes() {
361 result.push('%');
362 result.push_str(&format!("{:02X}", byte));
363 }
364 }
365 }
366 }
367
368 result
369}
370
371#[cfg(test)]
372mod tests {
373 use super::*;
374
375 #[test]
378 fn test_canonicalize_json_simple_object() {
379 let input = r#"{"z":1,"a":2}"#;
380 let output = canonicalize_json(input).unwrap();
381 assert_eq!(output, r#"{"a":2,"z":1}"#);
382 }
383
384 #[test]
385 fn test_canonicalize_json_nested_object() {
386 let input = r#"{"b":{"d":4,"c":3},"a":1}"#;
387 let output = canonicalize_json(input).unwrap();
388 assert_eq!(output, r#"{"a":1,"b":{"c":3,"d":4}}"#);
389 }
390
391 #[test]
392 fn test_canonicalize_json_with_whitespace() {
393 let input = r#"{ "z" : 1 , "a" : 2 }"#;
394 let output = canonicalize_json(input).unwrap();
395 assert_eq!(output, r#"{"a":2,"z":1}"#);
396 }
397
398 #[test]
399 fn test_canonicalize_json_array_preserved() {
400 let input = r#"{"arr":[3,1,2]}"#;
401 let output = canonicalize_json(input).unwrap();
402 assert_eq!(output, r#"{"arr":[3,1,2]}"#);
403 }
404
405 #[test]
406 fn test_canonicalize_json_null() {
407 let input = r#"{"a":null}"#;
408 let output = canonicalize_json(input).unwrap();
409 assert_eq!(output, r#"{"a":null}"#);
410 }
411
412 #[test]
413 fn test_canonicalize_json_boolean() {
414 let input = r#"{"b":true,"a":false}"#;
415 let output = canonicalize_json(input).unwrap();
416 assert_eq!(output, r#"{"a":false,"b":true}"#);
417 }
418
419 #[test]
420 fn test_canonicalize_json_empty_object() {
421 let input = r#"{}"#;
422 let output = canonicalize_json(input).unwrap();
423 assert_eq!(output, r#"{}"#);
424 }
425
426 #[test]
427 fn test_canonicalize_json_empty_array() {
428 let input = r#"[]"#;
429 let output = canonicalize_json(input).unwrap();
430 assert_eq!(output, r#"[]"#);
431 }
432
433 #[test]
434 fn test_canonicalize_json_unicode() {
435 let input = r#"{"name":"café"}"#;
437 let output = canonicalize_json(input).unwrap();
438 assert_eq!(output, r#"{"name":"café"}"#);
439 }
440
441 #[test]
442 fn test_canonicalize_json_invalid() {
443 let input = r#"{"a":}"#;
444 assert!(canonicalize_json(input).is_err());
445 }
446
447 #[test]
450 fn test_canonicalize_urlencoded_simple() {
451 let input = "z=3&a=1&b=2";
452 let output = canonicalize_urlencoded(input).unwrap();
453 assert_eq!(output, "a=1&b=2&z=3");
454 }
455
456 #[test]
457 fn test_canonicalize_urlencoded_duplicate_keys() {
458 let input = "a=2&a=1&b=3";
459 let output = canonicalize_urlencoded(input).unwrap();
460 assert_eq!(output, "a=2&a=1&b=3");
462 }
463
464 #[test]
465 fn test_canonicalize_urlencoded_encoded_space() {
466 let input = "a=hello%20world";
467 let output = canonicalize_urlencoded(input).unwrap();
468 assert_eq!(output, "a=hello%20world");
469 }
470
471 #[test]
472 fn test_canonicalize_urlencoded_plus_space() {
473 let input = "a=hello+world";
474 let output = canonicalize_urlencoded(input).unwrap();
475 assert_eq!(output, "a=hello%20world");
476 }
477
478 #[test]
479 fn test_canonicalize_urlencoded_empty() {
480 let input = "";
481 let output = canonicalize_urlencoded(input).unwrap();
482 assert_eq!(output, "");
483 }
484
485 #[test]
486 fn test_canonicalize_urlencoded_no_value() {
487 let input = "a&b=2";
488 let output = canonicalize_urlencoded(input).unwrap();
489 assert_eq!(output, "a=&b=2");
490 }
491}