toon_format/utils/
string.rs

1use crate::{
2    types::Delimiter,
3    utils::literal,
4};
5
6/// Escape special characters in a string for quoted output.
7pub fn escape_string(s: &str) -> String {
8    let mut result = String::with_capacity(s.len());
9
10    for ch in s.chars() {
11        match ch {
12            '\n' => result.push_str("\\n"),
13            '\r' => result.push_str("\\r"),
14            '\t' => result.push_str("\\t"),
15            '"' => result.push_str("\\\""),
16            '\\' => result.push_str("\\\\"),
17            _ => result.push(ch),
18        }
19    }
20
21    result
22}
23
24/// Unescape special characters in a quoted string.
25///
26/// Per TOON spec §7.1, only these escape sequences are valid:
27/// - `\\` → `\`
28/// - `\"` → `"`
29/// - `\n` → newline
30/// - `\r` → carriage return
31/// - `\t` → tab
32///
33/// Any other escape sequence MUST cause an error.
34///
35/// # Errors
36///
37/// Returns an error if the string contains an invalid escape sequence
38/// or if a backslash appears at the end of the string.
39pub fn unescape_string(s: &str) -> Result<String, String> {
40    let mut result = String::with_capacity(s.len());
41    let mut chars = s.chars().peekable();
42    let mut position = 0;
43
44    while let Some(ch) = chars.next() {
45        position += 1;
46
47        if ch == '\\' {
48            if let Some(&next) = chars.peek() {
49                match next {
50                    'n' => {
51                        result.push('\n');
52                        chars.next(); // consume the 'n'
53                        position += 1;
54                    }
55                    'r' => {
56                        result.push('\r');
57                        chars.next();
58                        position += 1;
59                    }
60                    't' => {
61                        result.push('\t');
62                        chars.next();
63                        position += 1;
64                    }
65                    '"' => {
66                        result.push('"');
67                        chars.next();
68                        position += 1;
69                    }
70                    '\\' => {
71                        result.push('\\');
72                        chars.next();
73                        position += 1;
74                    }
75                    _ => {
76                        return Err(format!(
77                            "Invalid escape sequence '\\{next}' at position {position}. Only \
78                             \\\\, \\\", \\n, \\r, \\t are valid",
79                        ));
80                    }
81                }
82            } else {
83                return Err(format!(
84                    "Unterminated escape sequence at end of string (position {position})",
85                ));
86            }
87        } else {
88            result.push(ch);
89        }
90    }
91
92    Ok(result)
93}
94
95/// Check if a key can be written without quotes (alphanumeric, underscore,
96/// dot).
97pub fn is_valid_unquoted_key(key: &str) -> bool {
98    if key.is_empty() {
99        return false;
100    }
101
102    let mut chars = key.chars();
103    let first = if let Some(c) = chars.next() {
104        c
105    } else {
106        return false;
107    };
108
109    if !first.is_alphabetic() && first != '_' {
110        return false;
111    }
112
113    chars.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
114}
115
116/// Determine if a string needs quoting based on content and delimiter.
117pub fn needs_quoting(s: &str, delimiter: char) -> bool {
118    if s.is_empty() {
119        return true;
120    }
121
122    if literal::is_literal_like(s) {
123        return true;
124    }
125
126    if s.chars().any(literal::is_structural_char) {
127        return true;
128    }
129
130    if s.contains('\\') || s.contains('"') {
131        return true;
132    }
133
134    if s.contains(delimiter) {
135        return true;
136    }
137
138    if s.contains('\n') || s.contains('\r') || s.contains('\t') {
139        return true;
140    }
141
142    if s.starts_with(char::is_whitespace) || s.ends_with(char::is_whitespace) {
143        return true;
144    }
145
146    if s.starts_with('-') {
147        return true;
148    }
149
150    // Check for leading zeros (e.g., "05", "007", "0123")
151    // Numbers with leading zeros must be quoted
152    if s.starts_with('0') && s.len() > 1 && s.chars().nth(1).is_some_and(|c| c.is_ascii_digit()) {
153        return true;
154    }
155
156    false
157}
158
159/// Quote and escape a string.
160pub fn quote_string(s: &str) -> String {
161    format!("\"{}\"", escape_string(s))
162}
163
164pub fn split_by_delimiter(s: &str, delimiter: Delimiter) -> Vec<String> {
165    let mut result = Vec::new();
166    let mut current = String::new();
167    let mut in_quotes = false;
168    let chars = s.chars().peekable();
169    let delim_char = delimiter.as_char();
170
171    for ch in chars {
172        if ch == '"' && (current.is_empty() || !current.ends_with('\\')) {
173            in_quotes = !in_quotes;
174            current.push(ch);
175        } else if ch == delim_char && !in_quotes {
176            result.push(current.trim().to_string());
177            current.clear();
178        } else {
179            current.push(ch);
180        }
181    }
182
183    if !current.is_empty() {
184        result.push(current.trim().to_string());
185    }
186
187    result
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193
194    #[test]
195    fn test_escape_string() {
196        assert_eq!(escape_string("hello"), "hello");
197        assert_eq!(escape_string("hello\nworld"), "hello\\nworld");
198        assert_eq!(escape_string("say \"hi\""), "say \\\"hi\\\"");
199        assert_eq!(escape_string("back\\slash"), "back\\\\slash");
200    }
201
202    #[test]
203    fn test_unescape_string() {
204        // Valid escapes
205        assert_eq!(unescape_string("hello").unwrap(), "hello");
206        assert_eq!(unescape_string("hello\\nworld").unwrap(), "hello\nworld");
207        assert_eq!(unescape_string("say \\\"hi\\\"").unwrap(), "say \"hi\"");
208        assert_eq!(unescape_string("back\\\\slash").unwrap(), "back\\slash");
209        assert_eq!(unescape_string("tab\\there").unwrap(), "tab\there");
210        assert_eq!(unescape_string("return\\rhere").unwrap(), "return\rhere");
211    }
212
213    #[test]
214    fn test_unescape_string_invalid_escapes() {
215        // Invalid escape sequences should error
216        assert!(unescape_string("bad\\xescape").is_err());
217        assert!(unescape_string("bad\\uescape").is_err());
218        assert!(unescape_string("bad\\0escape").is_err());
219        assert!(unescape_string("bad\\aescape").is_err());
220
221        // Unterminated escape at end
222        assert!(unescape_string("ends\\").is_err());
223    }
224
225    #[test]
226    fn test_unescape_string_error_messages() {
227        let result = unescape_string("bad\\x");
228        assert!(result.is_err());
229        let err = result.unwrap_err();
230        assert!(err.contains("Invalid escape sequence"));
231        assert!(err.contains("\\x"));
232    }
233
234    #[test]
235    fn test_needs_quoting() {
236        let comma = Delimiter::Comma.as_char();
237
238        assert!(needs_quoting("", comma));
239
240        assert!(needs_quoting("true", comma));
241        assert!(needs_quoting("false", comma));
242        assert!(needs_quoting("null", comma));
243        assert!(needs_quoting("123", comma));
244
245        assert!(needs_quoting("hello[world]", comma));
246        assert!(needs_quoting("key:value", comma));
247
248        assert!(needs_quoting("a,b", comma));
249        assert!(!needs_quoting("a,b", Delimiter::Pipe.as_char()));
250
251        assert!(!needs_quoting("hello world", comma));
252        assert!(needs_quoting(" hello", comma));
253        assert!(needs_quoting("hello ", comma));
254
255        assert!(!needs_quoting("hello", comma));
256        assert!(!needs_quoting("world", comma));
257        assert!(!needs_quoting("helloworld", comma));
258    }
259
260    #[test]
261    fn test_quote_string() {
262        assert_eq!(quote_string("hello"), "\"hello\"");
263        assert_eq!(quote_string("hello\nworld"), "\"hello\\nworld\"");
264    }
265
266    #[test]
267    fn test_split_by_delimiter() {
268        let comma = Delimiter::Comma;
269
270        assert_eq!(split_by_delimiter("a,b,c", comma), vec!["a", "b", "c"]);
271
272        assert_eq!(split_by_delimiter("a, b, c", comma), vec!["a", "b", "c"]);
273
274        assert_eq!(split_by_delimiter("\"a,b\",c", comma), vec!["\"a,b\"", "c"]);
275    }
276
277    #[test]
278    fn test_is_valid_unquoted_key() {
279        // Valid keys (should return true)
280        assert!(is_valid_unquoted_key("normal_key"));
281        assert!(is_valid_unquoted_key("key123"));
282        assert!(is_valid_unquoted_key("key.value"));
283        assert!(is_valid_unquoted_key("_private"));
284        assert!(is_valid_unquoted_key("KeyName"));
285        assert!(is_valid_unquoted_key("key_name"));
286        assert!(is_valid_unquoted_key("key.name.sub"));
287        assert!(is_valid_unquoted_key("a"));
288        assert!(is_valid_unquoted_key("_"));
289        assert!(is_valid_unquoted_key("key_123.value"));
290
291        assert!(!is_valid_unquoted_key(""));
292        assert!(!is_valid_unquoted_key("123"));
293        assert!(!is_valid_unquoted_key("key:value"));
294        assert!(!is_valid_unquoted_key("key-value"));
295        assert!(!is_valid_unquoted_key("key value"));
296        assert!(!is_valid_unquoted_key(".key"));
297        assert!(is_valid_unquoted_key("key.value.sub."));
298        assert!(is_valid_unquoted_key("key."));
299        assert!(!is_valid_unquoted_key("key[value]"));
300        assert!(!is_valid_unquoted_key("key{value}"));
301    }
302}