sea_core/parser/
string_utils.rs1pub fn unescape_string(s: &str) -> Result<String, String> {
2 let mut result = String::new();
3 let mut chars = s.chars().peekable();
4
5 while let Some(ch) = chars.next() {
6 if ch == '\\' {
7 match chars.next() {
8 Some('\\') => result.push('\\'),
9 Some('"') => result.push('"'),
10 Some('n') => result.push('\n'),
11 Some('r') => result.push('\r'),
12 Some('t') => result.push('\t'),
13 Some('u') => {
14 if chars.next() == Some('{') {
15 let mut hex_digits = String::new();
16 loop {
17 match chars.peek() {
18 Some(&'}') => {
19 chars.next();
20 break;
21 }
22 Some(&c) if c.is_ascii_hexdigit() => {
23 hex_digits.push(c);
24 chars.next();
25 }
26 Some(&c) => {
27 return Err(format!(
28 "Invalid character in unicode escape: {}",
29 c
30 ));
31 }
32 None => {
33 return Err("Unterminated unicode escape sequence".to_string());
34 }
35 }
36 }
37
38 if hex_digits.is_empty() || hex_digits.len() > 6 {
39 return Err(format!(
40 "Invalid unicode escape length: {}",
41 hex_digits.len()
42 ));
43 }
44
45 let code_point = u32::from_str_radix(&hex_digits, 16)
46 .map_err(|e| format!("Invalid hex in unicode escape: {}", e))?;
47
48 let unicode_char = char::from_u32(code_point).ok_or_else(|| {
49 format!("Invalid unicode code point: U+{:X}", code_point)
50 })?;
51
52 result.push(unicode_char);
53 } else {
54 return Err("Expected '{' after \\u".to_string());
55 }
56 }
57 Some(c) => {
58 return Err(format!("Unknown escape sequence: \\{}", c));
59 }
60 None => {
61 return Err("Unexpected end of string after backslash".to_string());
62 }
63 }
64 } else {
65 result.push(ch);
66 }
67 }
68
69 Ok(result)
70}
71
72#[cfg(test)]
73mod tests {
74 use super::*;
75
76 #[test]
77 fn test_unescape_basic() {
78 assert_eq!(unescape_string("hello").unwrap(), "hello");
79 }
80
81 #[test]
82 fn test_unescape_backslash() {
83 assert_eq!(unescape_string("a\\\\b").unwrap(), "a\\b");
84 }
85
86 #[test]
87 fn test_unescape_quote() {
88 assert_eq!(
89 unescape_string("say \\\"hello\\\"").unwrap(),
90 "say \"hello\""
91 );
92 }
93
94 #[test]
95 fn test_unescape_newline() {
96 assert_eq!(unescape_string("line1\\nline2").unwrap(), "line1\nline2");
97 }
98
99 #[test]
100 fn test_unescape_tab() {
101 assert_eq!(unescape_string("col1\\tcol2").unwrap(), "col1\tcol2");
102 }
103
104 #[test]
105 fn test_unescape_carriage_return() {
106 assert_eq!(unescape_string("text\\rmore").unwrap(), "text\rmore");
107 }
108
109 #[test]
110 fn test_unescape_unicode() {
111 assert_eq!(unescape_string("\\u{1F600}").unwrap(), "π");
112 assert_eq!(unescape_string("\\u{4E2D}").unwrap(), "δΈ");
113 assert_eq!(unescape_string("\\u{41}").unwrap(), "A");
114 }
115
116 #[test]
117 fn test_unescape_mixed() {
118 assert_eq!(
119 unescape_string("Hello\\nWorld\\t\\u{1F44B}").unwrap(),
120 "Hello\nWorld\tπ"
121 );
122 }
123
124 #[test]
125 fn test_unescape_invalid_escape() {
126 assert!(unescape_string("invalid\\x").is_err());
127 }
128
129 #[test]
130 fn test_unescape_unterminated_unicode() {
131 assert!(unescape_string("\\u{123").is_err());
132 }
133
134 #[test]
135 fn test_unescape_invalid_unicode_codepoint() {
136 assert!(unescape_string("\\u{110000}").is_err()); }
138}