Skip to main content

seam_engine/
escape.rs

1/* packages/server/engine/rust/src/escape.rs */
2
3/// Escape non-ASCII characters in JSON string values to `\uXXXX` sequences.
4///
5/// Walks the JSON text tracking whether the current position is inside a
6/// JSON string (handling `\"` and `\\` correctly). Non-ASCII codepoints
7/// inside strings are replaced with their `\uXXXX` representation; chars
8/// outside the BMP are encoded as surrogate pairs (`\uHHHH\uLLLL`).
9pub fn ascii_escape_json(json: &str) -> String {
10  let mut out = String::with_capacity(json.len());
11  let mut in_string = false;
12  let mut chars = json.chars().peekable();
13
14  while let Some(ch) = chars.next() {
15    if in_string {
16      if ch == '\\' {
17        // Escaped character inside string -- push both and skip next
18        out.push(ch);
19        if let Some(next) = chars.next() {
20          out.push(next);
21        }
22        continue;
23      }
24      if ch == '"' {
25        in_string = false;
26        out.push(ch);
27        continue;
28      }
29      if ch as u32 > 0x7F {
30        // Non-ASCII inside string: encode as \uXXXX (surrogate pair if needed)
31        let code = ch as u32;
32        if code > 0xFFFF {
33          let adjusted = code - 0x1_0000;
34          let hi = (adjusted >> 10) + 0xD800;
35          let lo = (adjusted & 0x3FF) + 0xDC00;
36          out.push_str(&format!("\\u{hi:04x}\\u{lo:04x}"));
37        } else {
38          out.push_str(&format!("\\u{code:04x}"));
39        }
40        continue;
41      }
42      out.push(ch);
43    } else {
44      if ch == '"' {
45        in_string = true;
46      }
47      out.push(ch);
48    }
49  }
50  out
51}
52
53#[cfg(test)]
54mod tests {
55  use super::*;
56
57  #[test]
58  fn ascii_passthrough() {
59    let input = r#"{"key":"hello"}"#;
60    assert_eq!(ascii_escape_json(input), input);
61  }
62
63  #[test]
64  fn escapes_cjk_in_values() {
65    let input = r#"{"msg":"你好"}"#;
66    let expected = r#"{"msg":"\u4f60\u597d"}"#;
67    assert_eq!(ascii_escape_json(input), expected);
68  }
69
70  #[test]
71  fn preserves_existing_escapes() {
72    let input = r#"{"a":"line\nbreak","b":"tab\there"}"#;
73    assert_eq!(ascii_escape_json(input), input);
74  }
75
76  #[test]
77  fn handles_escaped_quotes() {
78    let input = r#"{"a":"say \"hi\""}"#;
79    assert_eq!(ascii_escape_json(input), input);
80  }
81
82  #[test]
83  fn non_ascii_outside_strings_untouched() {
84    // Non-ASCII outside JSON strings should not appear in valid JSON,
85    // but the function should not corrupt them either.
86    let input = "// comment: cafe\u{0301}";
87    assert_eq!(ascii_escape_json(input), input);
88  }
89
90  #[test]
91  fn surrogate_pair_for_emoji() {
92    // U+1F600 (grinning face) -> \uD83D\uDE00
93    let input = r#"{"emoji":"😀"}"#;
94    let expected = r#"{"emoji":"\ud83d\ude00"}"#;
95    assert_eq!(ascii_escape_json(input), expected);
96  }
97
98  #[test]
99  fn mixed_ascii_and_non_ascii() {
100    let input = r#"{"title":"GitHub 仪表盘","cta":"View"}"#;
101    let expected = r#"{"title":"GitHub \u4eea\u8868\u76d8","cta":"View"}"#;
102    assert_eq!(ascii_escape_json(input), expected);
103  }
104
105  #[test]
106  fn empty_json() {
107    assert_eq!(ascii_escape_json("{}"), "{}");
108    assert_eq!(ascii_escape_json("[]"), "[]");
109  }
110}