sea_core/parser/
string_utils.rs

1pub fn unescape_string(s: &str) -> Result<String, String> {
2    let mut result = String::new();
3    let mut chars = s.chars().peekable();
4
5    while let Some(ch) = chars.next() {
6        if ch == '\\' {
7            match chars.next() {
8                Some('\\') => result.push('\\'),
9                Some('"') => result.push('"'),
10                Some('n') => result.push('\n'),
11                Some('r') => result.push('\r'),
12                Some('t') => result.push('\t'),
13                Some('u') => {
14                    if chars.next() == Some('{') {
15                        let mut hex_digits = String::new();
16                        loop {
17                            match chars.peek() {
18                                Some(&'}') => {
19                                    chars.next();
20                                    break;
21                                }
22                                Some(&c) if c.is_ascii_hexdigit() => {
23                                    hex_digits.push(c);
24                                    chars.next();
25                                }
26                                Some(&c) => {
27                                    return Err(format!(
28                                        "Invalid character in unicode escape: {}",
29                                        c
30                                    ));
31                                }
32                                None => {
33                                    return Err("Unterminated unicode escape sequence".to_string());
34                                }
35                            }
36                        }
37
38                        if hex_digits.is_empty() || hex_digits.len() > 6 {
39                            return Err(format!(
40                                "Invalid unicode escape length: {}",
41                                hex_digits.len()
42                            ));
43                        }
44
45                        let code_point = u32::from_str_radix(&hex_digits, 16)
46                            .map_err(|e| format!("Invalid hex in unicode escape: {}", e))?;
47
48                        let unicode_char = char::from_u32(code_point).ok_or_else(|| {
49                            format!("Invalid unicode code point: U+{:X}", code_point)
50                        })?;
51
52                        result.push(unicode_char);
53                    } else {
54                        return Err("Expected '{' after \\u".to_string());
55                    }
56                }
57                Some(c) => {
58                    return Err(format!("Unknown escape sequence: \\{}", c));
59                }
60                None => {
61                    return Err("Unexpected end of string after backslash".to_string());
62                }
63            }
64        } else {
65            result.push(ch);
66        }
67    }
68
69    Ok(result)
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75
76    #[test]
77    fn test_unescape_basic() {
78        assert_eq!(unescape_string("hello").unwrap(), "hello");
79    }
80
81    #[test]
82    fn test_unescape_backslash() {
83        assert_eq!(unescape_string("a\\\\b").unwrap(), "a\\b");
84    }
85
86    #[test]
87    fn test_unescape_quote() {
88        assert_eq!(
89            unescape_string("say \\\"hello\\\"").unwrap(),
90            "say \"hello\""
91        );
92    }
93
94    #[test]
95    fn test_unescape_newline() {
96        assert_eq!(unescape_string("line1\\nline2").unwrap(), "line1\nline2");
97    }
98
99    #[test]
100    fn test_unescape_tab() {
101        assert_eq!(unescape_string("col1\\tcol2").unwrap(), "col1\tcol2");
102    }
103
104    #[test]
105    fn test_unescape_carriage_return() {
106        assert_eq!(unescape_string("text\\rmore").unwrap(), "text\rmore");
107    }
108
109    #[test]
110    fn test_unescape_unicode() {
111        assert_eq!(unescape_string("\\u{1F600}").unwrap(), "πŸ˜€");
112        assert_eq!(unescape_string("\\u{4E2D}").unwrap(), "δΈ­");
113        assert_eq!(unescape_string("\\u{41}").unwrap(), "A");
114    }
115
116    #[test]
117    fn test_unescape_mixed() {
118        assert_eq!(
119            unescape_string("Hello\\nWorld\\t\\u{1F44B}").unwrap(),
120            "Hello\nWorld\tπŸ‘‹"
121        );
122    }
123
124    #[test]
125    fn test_unescape_invalid_escape() {
126        assert!(unescape_string("invalid\\x").is_err());
127    }
128
129    #[test]
130    fn test_unescape_unterminated_unicode() {
131        assert!(unescape_string("\\u{123").is_err());
132    }
133
134    #[test]
135    fn test_unescape_invalid_unicode_codepoint() {
136        assert!(unescape_string("\\u{110000}").is_err()); // Beyond valid Unicode range
137    }
138}